Turn FUNCTION_ARG_PADDING into a target hook
[gcc.git] / gcc / config / s390 / s390.c
1 /* Subroutines used for code generation on IBM S/390 and zSeries
2 Copyright (C) 1999-2017 Free Software Foundation, Inc.
3 Contributed by Hartmut Penner (hpenner@de.ibm.com) and
4 Ulrich Weigand (uweigand@de.ibm.com) and
5 Andreas Krebbel (Andreas.Krebbel@de.ibm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 3, or (at your option) any later
12 version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "target-globals.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "gimple.h"
32 #include "cfghooks.h"
33 #include "cfgloop.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "expmed.h"
40 #include "optabs.h"
41 #include "regs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "cgraph.h"
45 #include "diagnostic-core.h"
46 #include "diagnostic.h"
47 #include "alias.h"
48 #include "fold-const.h"
49 #include "print-tree.h"
50 #include "stor-layout.h"
51 #include "varasm.h"
52 #include "calls.h"
53 #include "conditions.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "flags.h"
57 #include "except.h"
58 #include "dojump.h"
59 #include "explow.h"
60 #include "stmt.h"
61 #include "expr.h"
62 #include "reload.h"
63 #include "cfgrtl.h"
64 #include "cfganal.h"
65 #include "lcm.h"
66 #include "cfgbuild.h"
67 #include "cfgcleanup.h"
68 #include "debug.h"
69 #include "langhooks.h"
70 #include "internal-fn.h"
71 #include "gimple-fold.h"
72 #include "tree-eh.h"
73 #include "gimplify.h"
74 #include "params.h"
75 #include "opts.h"
76 #include "tree-pass.h"
77 #include "context.h"
78 #include "builtins.h"
79 #include "rtl-iter.h"
80 #include "intl.h"
81 #include "tm-constrs.h"
82 #include "tree-vrp.h"
83 #include "symbol-summary.h"
84 #include "ipa-prop.h"
85 #include "ipa-fnsummary.h"
86
87 /* This file should be included last. */
88 #include "target-def.h"
89
90 static bool s390_hard_regno_mode_ok (unsigned int, machine_mode);
91
92 /* Remember the last target of s390_set_current_function. */
93 static GTY(()) tree s390_previous_fndecl;
94
95 /* Define the specific costs for a given cpu. */
96
97 struct processor_costs
98 {
99 /* multiplication */
100 const int m; /* cost of an M instruction. */
101 const int mghi; /* cost of an MGHI instruction. */
102 const int mh; /* cost of an MH instruction. */
103 const int mhi; /* cost of an MHI instruction. */
104 const int ml; /* cost of an ML instruction. */
105 const int mr; /* cost of an MR instruction. */
106 const int ms; /* cost of an MS instruction. */
107 const int msg; /* cost of an MSG instruction. */
108 const int msgf; /* cost of an MSGF instruction. */
109 const int msgfr; /* cost of an MSGFR instruction. */
110 const int msgr; /* cost of an MSGR instruction. */
111 const int msr; /* cost of an MSR instruction. */
112 const int mult_df; /* cost of multiplication in DFmode. */
113 const int mxbr;
114 /* square root */
115 const int sqxbr; /* cost of square root in TFmode. */
116 const int sqdbr; /* cost of square root in DFmode. */
117 const int sqebr; /* cost of square root in SFmode. */
118 /* multiply and add */
119 const int madbr; /* cost of multiply and add in DFmode. */
120 const int maebr; /* cost of multiply and add in SFmode. */
121 /* division */
122 const int dxbr;
123 const int ddbr;
124 const int debr;
125 const int dlgr;
126 const int dlr;
127 const int dr;
128 const int dsgfr;
129 const int dsgr;
130 };
131
132 #define s390_cost ((const struct processor_costs *)(s390_cost_pointer))
133
134 static const
135 struct processor_costs z900_cost =
136 {
137 COSTS_N_INSNS (5), /* M */
138 COSTS_N_INSNS (10), /* MGHI */
139 COSTS_N_INSNS (5), /* MH */
140 COSTS_N_INSNS (4), /* MHI */
141 COSTS_N_INSNS (5), /* ML */
142 COSTS_N_INSNS (5), /* MR */
143 COSTS_N_INSNS (4), /* MS */
144 COSTS_N_INSNS (15), /* MSG */
145 COSTS_N_INSNS (7), /* MSGF */
146 COSTS_N_INSNS (7), /* MSGFR */
147 COSTS_N_INSNS (10), /* MSGR */
148 COSTS_N_INSNS (4), /* MSR */
149 COSTS_N_INSNS (7), /* multiplication in DFmode */
150 COSTS_N_INSNS (13), /* MXBR */
151 COSTS_N_INSNS (136), /* SQXBR */
152 COSTS_N_INSNS (44), /* SQDBR */
153 COSTS_N_INSNS (35), /* SQEBR */
154 COSTS_N_INSNS (18), /* MADBR */
155 COSTS_N_INSNS (13), /* MAEBR */
156 COSTS_N_INSNS (134), /* DXBR */
157 COSTS_N_INSNS (30), /* DDBR */
158 COSTS_N_INSNS (27), /* DEBR */
159 COSTS_N_INSNS (220), /* DLGR */
160 COSTS_N_INSNS (34), /* DLR */
161 COSTS_N_INSNS (34), /* DR */
162 COSTS_N_INSNS (32), /* DSGFR */
163 COSTS_N_INSNS (32), /* DSGR */
164 };
165
166 static const
167 struct processor_costs z990_cost =
168 {
169 COSTS_N_INSNS (4), /* M */
170 COSTS_N_INSNS (2), /* MGHI */
171 COSTS_N_INSNS (2), /* MH */
172 COSTS_N_INSNS (2), /* MHI */
173 COSTS_N_INSNS (4), /* ML */
174 COSTS_N_INSNS (4), /* MR */
175 COSTS_N_INSNS (5), /* MS */
176 COSTS_N_INSNS (6), /* MSG */
177 COSTS_N_INSNS (4), /* MSGF */
178 COSTS_N_INSNS (4), /* MSGFR */
179 COSTS_N_INSNS (4), /* MSGR */
180 COSTS_N_INSNS (4), /* MSR */
181 COSTS_N_INSNS (1), /* multiplication in DFmode */
182 COSTS_N_INSNS (28), /* MXBR */
183 COSTS_N_INSNS (130), /* SQXBR */
184 COSTS_N_INSNS (66), /* SQDBR */
185 COSTS_N_INSNS (38), /* SQEBR */
186 COSTS_N_INSNS (1), /* MADBR */
187 COSTS_N_INSNS (1), /* MAEBR */
188 COSTS_N_INSNS (60), /* DXBR */
189 COSTS_N_INSNS (40), /* DDBR */
190 COSTS_N_INSNS (26), /* DEBR */
191 COSTS_N_INSNS (176), /* DLGR */
192 COSTS_N_INSNS (31), /* DLR */
193 COSTS_N_INSNS (31), /* DR */
194 COSTS_N_INSNS (31), /* DSGFR */
195 COSTS_N_INSNS (31), /* DSGR */
196 };
197
198 static const
199 struct processor_costs z9_109_cost =
200 {
201 COSTS_N_INSNS (4), /* M */
202 COSTS_N_INSNS (2), /* MGHI */
203 COSTS_N_INSNS (2), /* MH */
204 COSTS_N_INSNS (2), /* MHI */
205 COSTS_N_INSNS (4), /* ML */
206 COSTS_N_INSNS (4), /* MR */
207 COSTS_N_INSNS (5), /* MS */
208 COSTS_N_INSNS (6), /* MSG */
209 COSTS_N_INSNS (4), /* MSGF */
210 COSTS_N_INSNS (4), /* MSGFR */
211 COSTS_N_INSNS (4), /* MSGR */
212 COSTS_N_INSNS (4), /* MSR */
213 COSTS_N_INSNS (1), /* multiplication in DFmode */
214 COSTS_N_INSNS (28), /* MXBR */
215 COSTS_N_INSNS (130), /* SQXBR */
216 COSTS_N_INSNS (66), /* SQDBR */
217 COSTS_N_INSNS (38), /* SQEBR */
218 COSTS_N_INSNS (1), /* MADBR */
219 COSTS_N_INSNS (1), /* MAEBR */
220 COSTS_N_INSNS (60), /* DXBR */
221 COSTS_N_INSNS (40), /* DDBR */
222 COSTS_N_INSNS (26), /* DEBR */
223 COSTS_N_INSNS (30), /* DLGR */
224 COSTS_N_INSNS (23), /* DLR */
225 COSTS_N_INSNS (23), /* DR */
226 COSTS_N_INSNS (24), /* DSGFR */
227 COSTS_N_INSNS (24), /* DSGR */
228 };
229
230 static const
231 struct processor_costs z10_cost =
232 {
233 COSTS_N_INSNS (10), /* M */
234 COSTS_N_INSNS (10), /* MGHI */
235 COSTS_N_INSNS (10), /* MH */
236 COSTS_N_INSNS (10), /* MHI */
237 COSTS_N_INSNS (10), /* ML */
238 COSTS_N_INSNS (10), /* MR */
239 COSTS_N_INSNS (10), /* MS */
240 COSTS_N_INSNS (10), /* MSG */
241 COSTS_N_INSNS (10), /* MSGF */
242 COSTS_N_INSNS (10), /* MSGFR */
243 COSTS_N_INSNS (10), /* MSGR */
244 COSTS_N_INSNS (10), /* MSR */
245 COSTS_N_INSNS (1) , /* multiplication in DFmode */
246 COSTS_N_INSNS (50), /* MXBR */
247 COSTS_N_INSNS (120), /* SQXBR */
248 COSTS_N_INSNS (52), /* SQDBR */
249 COSTS_N_INSNS (38), /* SQEBR */
250 COSTS_N_INSNS (1), /* MADBR */
251 COSTS_N_INSNS (1), /* MAEBR */
252 COSTS_N_INSNS (111), /* DXBR */
253 COSTS_N_INSNS (39), /* DDBR */
254 COSTS_N_INSNS (32), /* DEBR */
255 COSTS_N_INSNS (160), /* DLGR */
256 COSTS_N_INSNS (71), /* DLR */
257 COSTS_N_INSNS (71), /* DR */
258 COSTS_N_INSNS (71), /* DSGFR */
259 COSTS_N_INSNS (71), /* DSGR */
260 };
261
262 static const
263 struct processor_costs z196_cost =
264 {
265 COSTS_N_INSNS (7), /* M */
266 COSTS_N_INSNS (5), /* MGHI */
267 COSTS_N_INSNS (5), /* MH */
268 COSTS_N_INSNS (5), /* MHI */
269 COSTS_N_INSNS (7), /* ML */
270 COSTS_N_INSNS (7), /* MR */
271 COSTS_N_INSNS (6), /* MS */
272 COSTS_N_INSNS (8), /* MSG */
273 COSTS_N_INSNS (6), /* MSGF */
274 COSTS_N_INSNS (6), /* MSGFR */
275 COSTS_N_INSNS (8), /* MSGR */
276 COSTS_N_INSNS (6), /* MSR */
277 COSTS_N_INSNS (1) , /* multiplication in DFmode */
278 COSTS_N_INSNS (40), /* MXBR B+40 */
279 COSTS_N_INSNS (100), /* SQXBR B+100 */
280 COSTS_N_INSNS (42), /* SQDBR B+42 */
281 COSTS_N_INSNS (28), /* SQEBR B+28 */
282 COSTS_N_INSNS (1), /* MADBR B */
283 COSTS_N_INSNS (1), /* MAEBR B */
284 COSTS_N_INSNS (101), /* DXBR B+101 */
285 COSTS_N_INSNS (29), /* DDBR */
286 COSTS_N_INSNS (22), /* DEBR */
287 COSTS_N_INSNS (160), /* DLGR cracked */
288 COSTS_N_INSNS (160), /* DLR cracked */
289 COSTS_N_INSNS (160), /* DR expanded */
290 COSTS_N_INSNS (160), /* DSGFR cracked */
291 COSTS_N_INSNS (160), /* DSGR cracked */
292 };
293
294 static const
295 struct processor_costs zEC12_cost =
296 {
297 COSTS_N_INSNS (7), /* M */
298 COSTS_N_INSNS (5), /* MGHI */
299 COSTS_N_INSNS (5), /* MH */
300 COSTS_N_INSNS (5), /* MHI */
301 COSTS_N_INSNS (7), /* ML */
302 COSTS_N_INSNS (7), /* MR */
303 COSTS_N_INSNS (6), /* MS */
304 COSTS_N_INSNS (8), /* MSG */
305 COSTS_N_INSNS (6), /* MSGF */
306 COSTS_N_INSNS (6), /* MSGFR */
307 COSTS_N_INSNS (8), /* MSGR */
308 COSTS_N_INSNS (6), /* MSR */
309 COSTS_N_INSNS (1) , /* multiplication in DFmode */
310 COSTS_N_INSNS (40), /* MXBR B+40 */
311 COSTS_N_INSNS (100), /* SQXBR B+100 */
312 COSTS_N_INSNS (42), /* SQDBR B+42 */
313 COSTS_N_INSNS (28), /* SQEBR B+28 */
314 COSTS_N_INSNS (1), /* MADBR B */
315 COSTS_N_INSNS (1), /* MAEBR B */
316 COSTS_N_INSNS (131), /* DXBR B+131 */
317 COSTS_N_INSNS (29), /* DDBR */
318 COSTS_N_INSNS (22), /* DEBR */
319 COSTS_N_INSNS (160), /* DLGR cracked */
320 COSTS_N_INSNS (160), /* DLR cracked */
321 COSTS_N_INSNS (160), /* DR expanded */
322 COSTS_N_INSNS (160), /* DSGFR cracked */
323 COSTS_N_INSNS (160), /* DSGR cracked */
324 };
325
326 static struct
327 {
328 /* The preferred name to be used in user visible output. */
329 const char *const name;
330 /* CPU name as it should be passed to Binutils via .machine */
331 const char *const binutils_name;
332 const enum processor_type processor;
333 const struct processor_costs *cost;
334 }
335 const processor_table[] =
336 {
337 { "g5", "g5", PROCESSOR_9672_G5, &z900_cost },
338 { "g6", "g6", PROCESSOR_9672_G6, &z900_cost },
339 { "z900", "z900", PROCESSOR_2064_Z900, &z900_cost },
340 { "z990", "z990", PROCESSOR_2084_Z990, &z990_cost },
341 { "z9-109", "z9-109", PROCESSOR_2094_Z9_109, &z9_109_cost },
342 { "z9-ec", "z9-ec", PROCESSOR_2094_Z9_EC, &z9_109_cost },
343 { "z10", "z10", PROCESSOR_2097_Z10, &z10_cost },
344 { "z196", "z196", PROCESSOR_2817_Z196, &z196_cost },
345 { "zEC12", "zEC12", PROCESSOR_2827_ZEC12, &zEC12_cost },
346 { "z13", "z13", PROCESSOR_2964_Z13, &zEC12_cost },
347 { "z14", "arch12", PROCESSOR_3906_Z14, &zEC12_cost },
348 { "native", "", PROCESSOR_NATIVE, NULL }
349 };
350
351 extern int reload_completed;
352
353 /* Kept up to date using the SCHED_VARIABLE_ISSUE hook. */
354 static rtx_insn *last_scheduled_insn;
355 #define MAX_SCHED_UNITS 3
356 static int last_scheduled_unit_distance[MAX_SCHED_UNITS];
357
358 /* The maximum score added for an instruction whose unit hasn't been
359 in use for MAX_SCHED_MIX_DISTANCE steps. Increase this value to
360 give instruction mix scheduling more priority over instruction
361 grouping. */
362 #define MAX_SCHED_MIX_SCORE 8
363
364 /* The maximum distance up to which individual scores will be
365 calculated. Everything beyond this gives MAX_SCHED_MIX_SCORE.
366 Increase this with the OOO windows size of the machine. */
367 #define MAX_SCHED_MIX_DISTANCE 100
368
369 /* Structure used to hold the components of a S/390 memory
370 address. A legitimate address on S/390 is of the general
371 form
372 base + index + displacement
373 where any of the components is optional.
374
375 base and index are registers of the class ADDR_REGS,
376 displacement is an unsigned 12-bit immediate constant. */
377
378 struct s390_address
379 {
380 rtx base;
381 rtx indx;
382 rtx disp;
383 bool pointer;
384 bool literal_pool;
385 };
386
387 /* The following structure is embedded in the machine
388 specific part of struct function. */
389
390 struct GTY (()) s390_frame_layout
391 {
392 /* Offset within stack frame. */
393 HOST_WIDE_INT gprs_offset;
394 HOST_WIDE_INT f0_offset;
395 HOST_WIDE_INT f4_offset;
396 HOST_WIDE_INT f8_offset;
397 HOST_WIDE_INT backchain_offset;
398
399 /* Number of first and last gpr where slots in the register
400 save area are reserved for. */
401 int first_save_gpr_slot;
402 int last_save_gpr_slot;
403
404 /* Location (FP register number) where GPRs (r0-r15) should
405 be saved to.
406 0 - does not need to be saved at all
407 -1 - stack slot */
408 #define SAVE_SLOT_NONE 0
409 #define SAVE_SLOT_STACK -1
410 signed char gpr_save_slots[16];
411
412 /* Number of first and last gpr to be saved, restored. */
413 int first_save_gpr;
414 int first_restore_gpr;
415 int last_save_gpr;
416 int last_restore_gpr;
417
418 /* Bits standing for floating point registers. Set, if the
419 respective register has to be saved. Starting with reg 16 (f0)
420 at the rightmost bit.
421 Bit 15 14 13 12 11 10 9 8 7 6 5 4 3 2 1 0
422 fpr 15 13 11 9 14 12 10 8 7 5 3 1 6 4 2 0
423 reg 31 30 29 28 27 26 25 24 23 22 21 20 19 18 17 16 */
424 unsigned int fpr_bitmap;
425
426 /* Number of floating point registers f8-f15 which must be saved. */
427 int high_fprs;
428
429 /* Set if return address needs to be saved.
430 This flag is set by s390_return_addr_rtx if it could not use
431 the initial value of r14 and therefore depends on r14 saved
432 to the stack. */
433 bool save_return_addr_p;
434
435 /* Size of stack frame. */
436 HOST_WIDE_INT frame_size;
437 };
438
439 /* Define the structure for the machine field in struct function. */
440
441 struct GTY(()) machine_function
442 {
443 struct s390_frame_layout frame_layout;
444
445 /* Literal pool base register. */
446 rtx base_reg;
447
448 /* True if we may need to perform branch splitting. */
449 bool split_branches_pending_p;
450
451 bool has_landing_pad_p;
452
453 /* True if the current function may contain a tbegin clobbering
454 FPRs. */
455 bool tbegin_p;
456
457 /* For -fsplit-stack support: A stack local which holds a pointer to
458 the stack arguments for a function with a variable number of
459 arguments. This is set at the start of the function and is used
460 to initialize the overflow_arg_area field of the va_list
461 structure. */
462 rtx split_stack_varargs_pointer;
463 };
464
465 /* Few accessor macros for struct cfun->machine->s390_frame_layout. */
466
467 #define cfun_frame_layout (cfun->machine->frame_layout)
468 #define cfun_save_high_fprs_p (!!cfun_frame_layout.high_fprs)
469 #define cfun_save_arg_fprs_p (!!(TARGET_64BIT \
470 ? cfun_frame_layout.fpr_bitmap & 0x0f \
471 : cfun_frame_layout.fpr_bitmap & 0x03))
472 #define cfun_gprs_save_area_size ((cfun_frame_layout.last_save_gpr_slot - \
473 cfun_frame_layout.first_save_gpr_slot + 1) * UNITS_PER_LONG)
474 #define cfun_set_fpr_save(REGNO) (cfun->machine->frame_layout.fpr_bitmap |= \
475 (1 << (REGNO - FPR0_REGNUM)))
476 #define cfun_fpr_save_p(REGNO) (!!(cfun->machine->frame_layout.fpr_bitmap & \
477 (1 << (REGNO - FPR0_REGNUM))))
478 #define cfun_gpr_save_slot(REGNO) \
479 cfun->machine->frame_layout.gpr_save_slots[REGNO]
480
481 /* Number of GPRs and FPRs used for argument passing. */
482 #define GP_ARG_NUM_REG 5
483 #define FP_ARG_NUM_REG (TARGET_64BIT? 4 : 2)
484 #define VEC_ARG_NUM_REG 8
485
486 /* A couple of shortcuts. */
487 #define CONST_OK_FOR_J(x) \
488 CONST_OK_FOR_CONSTRAINT_P((x), 'J', "J")
489 #define CONST_OK_FOR_K(x) \
490 CONST_OK_FOR_CONSTRAINT_P((x), 'K', "K")
491 #define CONST_OK_FOR_Os(x) \
492 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Os")
493 #define CONST_OK_FOR_Op(x) \
494 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "Op")
495 #define CONST_OK_FOR_On(x) \
496 CONST_OK_FOR_CONSTRAINT_P((x), 'O', "On")
497
498 #define REGNO_PAIR_OK(REGNO, MODE) \
499 (HARD_REGNO_NREGS ((REGNO), (MODE)) == 1 || !((REGNO) & 1))
500
501 /* That's the read ahead of the dynamic branch prediction unit in
502 bytes on a z10 (or higher) CPU. */
503 #define PREDICT_DISTANCE (TARGET_Z10 ? 384 : 2048)
504
505
506 /* Indicate which ABI has been used for passing vector args.
507 0 - no vector type arguments have been passed where the ABI is relevant
508 1 - the old ABI has been used
509 2 - a vector type argument has been passed either in a vector register
510 or on the stack by value */
511 static int s390_vector_abi = 0;
512
513 /* Set the vector ABI marker if TYPE is subject to the vector ABI
514 switch. The vector ABI affects only vector data types. There are
515 two aspects of the vector ABI relevant here:
516
517 1. vectors >= 16 bytes have an alignment of 8 bytes with the new
518 ABI and natural alignment with the old.
519
520 2. vector <= 16 bytes are passed in VRs or by value on the stack
521 with the new ABI but by reference on the stack with the old.
522
523 If ARG_P is true TYPE is used for a function argument or return
524 value. The ABI marker then is set for all vector data types. If
525 ARG_P is false only type 1 vectors are being checked. */
526
527 static void
528 s390_check_type_for_vector_abi (const_tree type, bool arg_p, bool in_struct_p)
529 {
530 static hash_set<const_tree> visited_types_hash;
531
532 if (s390_vector_abi)
533 return;
534
535 if (type == NULL_TREE || TREE_CODE (type) == ERROR_MARK)
536 return;
537
538 if (visited_types_hash.contains (type))
539 return;
540
541 visited_types_hash.add (type);
542
543 if (VECTOR_TYPE_P (type))
544 {
545 int type_size = int_size_in_bytes (type);
546
547 /* Outside arguments only the alignment is changing and this
548 only happens for vector types >= 16 bytes. */
549 if (!arg_p && type_size < 16)
550 return;
551
552 /* In arguments vector types > 16 are passed as before (GCC
553 never enforced the bigger alignment for arguments which was
554 required by the old vector ABI). However, it might still be
555 ABI relevant due to the changed alignment if it is a struct
556 member. */
557 if (arg_p && type_size > 16 && !in_struct_p)
558 return;
559
560 s390_vector_abi = TARGET_VX_ABI ? 2 : 1;
561 }
562 else if (POINTER_TYPE_P (type) || TREE_CODE (type) == ARRAY_TYPE)
563 {
564 /* ARRAY_TYPE: Since with neither of the ABIs we have more than
565 natural alignment there will never be ABI dependent padding
566 in an array type. That's why we do not set in_struct_p to
567 true here. */
568 s390_check_type_for_vector_abi (TREE_TYPE (type), arg_p, in_struct_p);
569 }
570 else if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
571 {
572 tree arg_chain;
573
574 /* Check the return type. */
575 s390_check_type_for_vector_abi (TREE_TYPE (type), true, false);
576
577 for (arg_chain = TYPE_ARG_TYPES (type);
578 arg_chain;
579 arg_chain = TREE_CHAIN (arg_chain))
580 s390_check_type_for_vector_abi (TREE_VALUE (arg_chain), true, false);
581 }
582 else if (RECORD_OR_UNION_TYPE_P (type))
583 {
584 tree field;
585
586 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
587 {
588 if (TREE_CODE (field) != FIELD_DECL)
589 continue;
590
591 s390_check_type_for_vector_abi (TREE_TYPE (field), arg_p, true);
592 }
593 }
594 }
595
596
597 /* System z builtins. */
598
599 #include "s390-builtins.h"
600
601 const unsigned int bflags_builtin[S390_BUILTIN_MAX + 1] =
602 {
603 #undef B_DEF
604 #undef OB_DEF
605 #undef OB_DEF_VAR
606 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, ...) BFLAGS,
607 #define OB_DEF(...)
608 #define OB_DEF_VAR(...)
609 #include "s390-builtins.def"
610 0
611 };
612
613 const unsigned int opflags_builtin[S390_BUILTIN_MAX + 1] =
614 {
615 #undef B_DEF
616 #undef OB_DEF
617 #undef OB_DEF_VAR
618 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, ...) OPFLAGS,
619 #define OB_DEF(...)
620 #define OB_DEF_VAR(...)
621 #include "s390-builtins.def"
622 0
623 };
624
625 const unsigned int bflags_overloaded_builtin[S390_OVERLOADED_BUILTIN_MAX + 1] =
626 {
627 #undef B_DEF
628 #undef OB_DEF
629 #undef OB_DEF_VAR
630 #define B_DEF(...)
631 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, ...) BFLAGS,
632 #define OB_DEF_VAR(...)
633 #include "s390-builtins.def"
634 0
635 };
636
637 const unsigned int
638 bflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
639 {
640 #undef B_DEF
641 #undef OB_DEF
642 #undef OB_DEF_VAR
643 #define B_DEF(...)
644 #define OB_DEF(...)
645 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) FLAGS,
646 #include "s390-builtins.def"
647 0
648 };
649
650 const unsigned int
651 opflags_overloaded_builtin_var[S390_OVERLOADED_BUILTIN_VAR_MAX + 1] =
652 {
653 #undef B_DEF
654 #undef OB_DEF
655 #undef OB_DEF_VAR
656 #define B_DEF(...)
657 #define OB_DEF(...)
658 #define OB_DEF_VAR(NAME, PATTERN, FLAGS, OPFLAGS, FNTYPE) OPFLAGS,
659 #include "s390-builtins.def"
660 0
661 };
662
663 tree s390_builtin_types[BT_MAX];
664 tree s390_builtin_fn_types[BT_FN_MAX];
665 tree s390_builtin_decls[S390_BUILTIN_MAX +
666 S390_OVERLOADED_BUILTIN_MAX +
667 S390_OVERLOADED_BUILTIN_VAR_MAX];
668
669 static enum insn_code const code_for_builtin[S390_BUILTIN_MAX + 1] = {
670 #undef B_DEF
671 #undef OB_DEF
672 #undef OB_DEF_VAR
673 #define B_DEF(NAME, PATTERN, ...) CODE_FOR_##PATTERN,
674 #define OB_DEF(...)
675 #define OB_DEF_VAR(...)
676
677 #include "s390-builtins.def"
678 CODE_FOR_nothing
679 };
680
681 static void
682 s390_init_builtins (void)
683 {
684 /* These definitions are being used in s390-builtins.def. */
685 tree returns_twice_attr = tree_cons (get_identifier ("returns_twice"),
686 NULL, NULL);
687 tree noreturn_attr = tree_cons (get_identifier ("noreturn"), NULL, NULL);
688 tree c_uint64_type_node;
689
690 /* The uint64_type_node from tree.c is not compatible to the C99
691 uint64_t data type. What we want is c_uint64_type_node from
692 c-common.c. But since backend code is not supposed to interface
693 with the frontend we recreate it here. */
694 if (TARGET_64BIT)
695 c_uint64_type_node = long_unsigned_type_node;
696 else
697 c_uint64_type_node = long_long_unsigned_type_node;
698
699 #undef DEF_TYPE
700 #define DEF_TYPE(INDEX, NODE, CONST_P) \
701 if (s390_builtin_types[INDEX] == NULL) \
702 s390_builtin_types[INDEX] = (!CONST_P) ? \
703 (NODE) : build_type_variant ((NODE), 1, 0);
704
705 #undef DEF_POINTER_TYPE
706 #define DEF_POINTER_TYPE(INDEX, INDEX_BASE) \
707 if (s390_builtin_types[INDEX] == NULL) \
708 s390_builtin_types[INDEX] = \
709 build_pointer_type (s390_builtin_types[INDEX_BASE]);
710
711 #undef DEF_DISTINCT_TYPE
712 #define DEF_DISTINCT_TYPE(INDEX, INDEX_BASE) \
713 if (s390_builtin_types[INDEX] == NULL) \
714 s390_builtin_types[INDEX] = \
715 build_distinct_type_copy (s390_builtin_types[INDEX_BASE]);
716
717 #undef DEF_VECTOR_TYPE
718 #define DEF_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
719 if (s390_builtin_types[INDEX] == NULL) \
720 s390_builtin_types[INDEX] = \
721 build_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
722
723 #undef DEF_OPAQUE_VECTOR_TYPE
724 #define DEF_OPAQUE_VECTOR_TYPE(INDEX, INDEX_BASE, ELEMENTS) \
725 if (s390_builtin_types[INDEX] == NULL) \
726 s390_builtin_types[INDEX] = \
727 build_opaque_vector_type (s390_builtin_types[INDEX_BASE], ELEMENTS);
728
729 #undef DEF_FN_TYPE
730 #define DEF_FN_TYPE(INDEX, args...) \
731 if (s390_builtin_fn_types[INDEX] == NULL) \
732 s390_builtin_fn_types[INDEX] = \
733 build_function_type_list (args, NULL_TREE);
734 #undef DEF_OV_TYPE
735 #define DEF_OV_TYPE(...)
736 #include "s390-builtin-types.def"
737
738 #undef B_DEF
739 #define B_DEF(NAME, PATTERN, ATTRS, BFLAGS, OPFLAGS, FNTYPE) \
740 if (s390_builtin_decls[S390_BUILTIN_##NAME] == NULL) \
741 s390_builtin_decls[S390_BUILTIN_##NAME] = \
742 add_builtin_function ("__builtin_" #NAME, \
743 s390_builtin_fn_types[FNTYPE], \
744 S390_BUILTIN_##NAME, \
745 BUILT_IN_MD, \
746 NULL, \
747 ATTRS);
748 #undef OB_DEF
749 #define OB_DEF(NAME, FIRST_VAR_NAME, LAST_VAR_NAME, BFLAGS, FNTYPE) \
750 if (s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] \
751 == NULL) \
752 s390_builtin_decls[S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX] = \
753 add_builtin_function ("__builtin_" #NAME, \
754 s390_builtin_fn_types[FNTYPE], \
755 S390_OVERLOADED_BUILTIN_##NAME + S390_BUILTIN_MAX, \
756 BUILT_IN_MD, \
757 NULL, \
758 0);
759 #undef OB_DEF_VAR
760 #define OB_DEF_VAR(...)
761 #include "s390-builtins.def"
762
763 }
764
765 /* Return true if ARG is appropriate as argument number ARGNUM of
766 builtin DECL. The operand flags from s390-builtins.def have to
767 passed as OP_FLAGS. */
768 bool
769 s390_const_operand_ok (tree arg, int argnum, int op_flags, tree decl)
770 {
771 if (O_UIMM_P (op_flags))
772 {
773 int bitwidths[] = { 1, 2, 3, 4, 5, 8, 12, 16, 32 };
774 int bitwidth = bitwidths[op_flags - O_U1];
775
776 if (!tree_fits_uhwi_p (arg)
777 || tree_to_uhwi (arg) > (HOST_WIDE_INT_1U << bitwidth) - 1)
778 {
779 error("constant argument %d for builtin %qF is out of range (0.."
780 HOST_WIDE_INT_PRINT_UNSIGNED ")",
781 argnum, decl,
782 (HOST_WIDE_INT_1U << bitwidth) - 1);
783 return false;
784 }
785 }
786
787 if (O_SIMM_P (op_flags))
788 {
789 int bitwidths[] = { 2, 3, 4, 5, 8, 12, 16, 32 };
790 int bitwidth = bitwidths[op_flags - O_S2];
791
792 if (!tree_fits_shwi_p (arg)
793 || tree_to_shwi (arg) < -(HOST_WIDE_INT_1 << (bitwidth - 1))
794 || tree_to_shwi (arg) > ((HOST_WIDE_INT_1 << (bitwidth - 1)) - 1))
795 {
796 error("constant argument %d for builtin %qF is out of range ("
797 HOST_WIDE_INT_PRINT_DEC ".."
798 HOST_WIDE_INT_PRINT_DEC ")",
799 argnum, decl,
800 -(HOST_WIDE_INT_1 << (bitwidth - 1)),
801 (HOST_WIDE_INT_1 << (bitwidth - 1)) - 1);
802 return false;
803 }
804 }
805 return true;
806 }
807
808 /* Expand an expression EXP that calls a built-in function,
809 with result going to TARGET if that's convenient
810 (and in mode MODE if that's convenient).
811 SUBTARGET may be used as the target for computing one of EXP's operands.
812 IGNORE is nonzero if the value is to be ignored. */
813
814 static rtx
815 s390_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
816 machine_mode mode ATTRIBUTE_UNUSED,
817 int ignore ATTRIBUTE_UNUSED)
818 {
819 #define MAX_ARGS 6
820
821 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
822 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
823 enum insn_code icode;
824 rtx op[MAX_ARGS], pat;
825 int arity;
826 bool nonvoid;
827 tree arg;
828 call_expr_arg_iterator iter;
829 unsigned int all_op_flags = opflags_for_builtin (fcode);
830 machine_mode last_vec_mode = VOIDmode;
831
832 if (TARGET_DEBUG_ARG)
833 {
834 fprintf (stderr,
835 "s390_expand_builtin, code = %4d, %s, bflags = 0x%x\n",
836 (int)fcode, IDENTIFIER_POINTER (DECL_NAME (fndecl)),
837 bflags_for_builtin (fcode));
838 }
839
840 if (S390_USE_TARGET_ATTRIBUTE)
841 {
842 unsigned int bflags;
843
844 bflags = bflags_for_builtin (fcode);
845 if ((bflags & B_HTM) && !TARGET_HTM)
846 {
847 error ("builtin %qF is not supported without -mhtm "
848 "(default with -march=zEC12 and higher).", fndecl);
849 return const0_rtx;
850 }
851 if (((bflags & B_VX) || (bflags & B_VXE)) && !TARGET_VX)
852 {
853 error ("builtin %qF requires -mvx "
854 "(default with -march=z13 and higher).", fndecl);
855 return const0_rtx;
856 }
857
858 if ((bflags & B_VXE) && !TARGET_VXE)
859 {
860 error ("Builtin %qF requires z14 or higher.", fndecl);
861 return const0_rtx;
862 }
863 }
864 if (fcode >= S390_OVERLOADED_BUILTIN_VAR_OFFSET
865 && fcode < S390_ALL_BUILTIN_MAX)
866 {
867 gcc_unreachable ();
868 }
869 else if (fcode < S390_OVERLOADED_BUILTIN_OFFSET)
870 {
871 icode = code_for_builtin[fcode];
872 /* Set a flag in the machine specific cfun part in order to support
873 saving/restoring of FPRs. */
874 if (fcode == S390_BUILTIN_tbegin || fcode == S390_BUILTIN_tbegin_retry)
875 cfun->machine->tbegin_p = true;
876 }
877 else if (fcode < S390_OVERLOADED_BUILTIN_VAR_OFFSET)
878 {
879 error ("unresolved overloaded builtin");
880 return const0_rtx;
881 }
882 else
883 internal_error ("bad builtin fcode");
884
885 if (icode == 0)
886 internal_error ("bad builtin icode");
887
888 nonvoid = TREE_TYPE (TREE_TYPE (fndecl)) != void_type_node;
889
890 if (nonvoid)
891 {
892 machine_mode tmode = insn_data[icode].operand[0].mode;
893 if (!target
894 || GET_MODE (target) != tmode
895 || !(*insn_data[icode].operand[0].predicate) (target, tmode))
896 target = gen_reg_rtx (tmode);
897
898 /* There are builtins (e.g. vec_promote) with no vector
899 arguments but an element selector. So we have to also look
900 at the vector return type when emitting the modulo
901 operation. */
902 if (VECTOR_MODE_P (insn_data[icode].operand[0].mode))
903 last_vec_mode = insn_data[icode].operand[0].mode;
904 }
905
906 arity = 0;
907 FOR_EACH_CALL_EXPR_ARG (arg, iter, exp)
908 {
909 rtx tmp_rtx;
910 const struct insn_operand_data *insn_op;
911 unsigned int op_flags = all_op_flags & ((1 << O_SHIFT) - 1);
912
913 all_op_flags = all_op_flags >> O_SHIFT;
914
915 if (arg == error_mark_node)
916 return NULL_RTX;
917 if (arity >= MAX_ARGS)
918 return NULL_RTX;
919
920 if (O_IMM_P (op_flags)
921 && TREE_CODE (arg) != INTEGER_CST)
922 {
923 error ("constant value required for builtin %qF argument %d",
924 fndecl, arity + 1);
925 return const0_rtx;
926 }
927
928 if (!s390_const_operand_ok (arg, arity + 1, op_flags, fndecl))
929 return const0_rtx;
930
931 insn_op = &insn_data[icode].operand[arity + nonvoid];
932 op[arity] = expand_expr (arg, NULL_RTX, insn_op->mode, EXPAND_NORMAL);
933
934 /* expand_expr truncates constants to the target mode only if it
935 is "convenient". However, our checks below rely on this
936 being done. */
937 if (CONST_INT_P (op[arity])
938 && SCALAR_INT_MODE_P (insn_op->mode)
939 && GET_MODE (op[arity]) != insn_op->mode)
940 op[arity] = GEN_INT (trunc_int_for_mode (INTVAL (op[arity]),
941 insn_op->mode));
942
943 /* Wrap the expanded RTX for pointer types into a MEM expr with
944 the proper mode. This allows us to use e.g. (match_operand
945 "memory_operand"..) in the insn patterns instead of (mem
946 (match_operand "address_operand)). This is helpful for
947 patterns not just accepting MEMs. */
948 if (POINTER_TYPE_P (TREE_TYPE (arg))
949 && insn_op->predicate != address_operand)
950 op[arity] = gen_rtx_MEM (insn_op->mode, op[arity]);
951
952 /* Expand the module operation required on element selectors. */
953 if (op_flags == O_ELEM)
954 {
955 gcc_assert (last_vec_mode != VOIDmode);
956 op[arity] = simplify_expand_binop (SImode, code_to_optab (AND),
957 op[arity],
958 GEN_INT (GET_MODE_NUNITS (last_vec_mode) - 1),
959 NULL_RTX, 1, OPTAB_DIRECT);
960 }
961
962 /* Record the vector mode used for an element selector. This assumes:
963 1. There is no builtin with two different vector modes and an element selector
964 2. The element selector comes after the vector type it is referring to.
965 This currently the true for all the builtins but FIXME we
966 should better check for that. */
967 if (VECTOR_MODE_P (insn_op->mode))
968 last_vec_mode = insn_op->mode;
969
970 if (insn_op->predicate (op[arity], insn_op->mode))
971 {
972 arity++;
973 continue;
974 }
975
976 if (MEM_P (op[arity])
977 && insn_op->predicate == memory_operand
978 && (GET_MODE (XEXP (op[arity], 0)) == Pmode
979 || GET_MODE (XEXP (op[arity], 0)) == VOIDmode))
980 {
981 op[arity] = replace_equiv_address (op[arity],
982 copy_to_mode_reg (Pmode,
983 XEXP (op[arity], 0)));
984 }
985 /* Some of the builtins require different modes/types than the
986 pattern in order to implement a specific API. Instead of
987 adding many expanders which do the mode change we do it here.
988 E.g. s390_vec_add_u128 required to have vector unsigned char
989 arguments is mapped to addti3. */
990 else if (insn_op->mode != VOIDmode
991 && GET_MODE (op[arity]) != VOIDmode
992 && GET_MODE (op[arity]) != insn_op->mode
993 && ((tmp_rtx = simplify_gen_subreg (insn_op->mode, op[arity],
994 GET_MODE (op[arity]), 0))
995 != NULL_RTX))
996 {
997 op[arity] = tmp_rtx;
998 }
999 else if (GET_MODE (op[arity]) == insn_op->mode
1000 || GET_MODE (op[arity]) == VOIDmode
1001 || (insn_op->predicate == address_operand
1002 && GET_MODE (op[arity]) == Pmode))
1003 {
1004 /* An address_operand usually has VOIDmode in the expander
1005 so we cannot use this. */
1006 machine_mode target_mode =
1007 (insn_op->predicate == address_operand
1008 ? (machine_mode) Pmode : insn_op->mode);
1009 op[arity] = copy_to_mode_reg (target_mode, op[arity]);
1010 }
1011
1012 if (!insn_op->predicate (op[arity], insn_op->mode))
1013 {
1014 error ("invalid argument %d for builtin %qF", arity + 1, fndecl);
1015 return const0_rtx;
1016 }
1017 arity++;
1018 }
1019
1020 switch (arity)
1021 {
1022 case 0:
1023 pat = GEN_FCN (icode) (target);
1024 break;
1025 case 1:
1026 if (nonvoid)
1027 pat = GEN_FCN (icode) (target, op[0]);
1028 else
1029 pat = GEN_FCN (icode) (op[0]);
1030 break;
1031 case 2:
1032 if (nonvoid)
1033 pat = GEN_FCN (icode) (target, op[0], op[1]);
1034 else
1035 pat = GEN_FCN (icode) (op[0], op[1]);
1036 break;
1037 case 3:
1038 if (nonvoid)
1039 pat = GEN_FCN (icode) (target, op[0], op[1], op[2]);
1040 else
1041 pat = GEN_FCN (icode) (op[0], op[1], op[2]);
1042 break;
1043 case 4:
1044 if (nonvoid)
1045 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3]);
1046 else
1047 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3]);
1048 break;
1049 case 5:
1050 if (nonvoid)
1051 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4]);
1052 else
1053 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4]);
1054 break;
1055 case 6:
1056 if (nonvoid)
1057 pat = GEN_FCN (icode) (target, op[0], op[1], op[2], op[3], op[4], op[5]);
1058 else
1059 pat = GEN_FCN (icode) (op[0], op[1], op[2], op[3], op[4], op[5]);
1060 break;
1061 default:
1062 gcc_unreachable ();
1063 }
1064 if (!pat)
1065 return NULL_RTX;
1066 emit_insn (pat);
1067
1068 if (nonvoid)
1069 return target;
1070 else
1071 return const0_rtx;
1072 }
1073
1074
1075 static const int s390_hotpatch_hw_max = 1000000;
1076 static int s390_hotpatch_hw_before_label = 0;
1077 static int s390_hotpatch_hw_after_label = 0;
1078
1079 /* Check whether the hotpatch attribute is applied to a function and, if it has
1080 an argument, the argument is valid. */
1081
1082 static tree
1083 s390_handle_hotpatch_attribute (tree *node, tree name, tree args,
1084 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1085 {
1086 tree expr;
1087 tree expr2;
1088 int err;
1089
1090 if (TREE_CODE (*node) != FUNCTION_DECL)
1091 {
1092 warning (OPT_Wattributes, "%qE attribute only applies to functions",
1093 name);
1094 *no_add_attrs = true;
1095 }
1096 if (args != NULL && TREE_CHAIN (args) != NULL)
1097 {
1098 expr = TREE_VALUE (args);
1099 expr2 = TREE_VALUE (TREE_CHAIN (args));
1100 }
1101 if (args == NULL || TREE_CHAIN (args) == NULL)
1102 err = 1;
1103 else if (TREE_CODE (expr) != INTEGER_CST
1104 || !INTEGRAL_TYPE_P (TREE_TYPE (expr))
1105 || wi::gtu_p (expr, s390_hotpatch_hw_max))
1106 err = 1;
1107 else if (TREE_CODE (expr2) != INTEGER_CST
1108 || !INTEGRAL_TYPE_P (TREE_TYPE (expr2))
1109 || wi::gtu_p (expr2, s390_hotpatch_hw_max))
1110 err = 1;
1111 else
1112 err = 0;
1113 if (err)
1114 {
1115 error ("requested %qE attribute is not a comma separated pair of"
1116 " non-negative integer constants or too large (max. %d)", name,
1117 s390_hotpatch_hw_max);
1118 *no_add_attrs = true;
1119 }
1120
1121 return NULL_TREE;
1122 }
1123
1124 /* Expand the s390_vector_bool type attribute. */
1125
1126 static tree
1127 s390_handle_vectorbool_attribute (tree *node, tree name ATTRIBUTE_UNUSED,
1128 tree args ATTRIBUTE_UNUSED,
1129 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
1130 {
1131 tree type = *node, result = NULL_TREE;
1132 machine_mode mode;
1133
1134 while (POINTER_TYPE_P (type)
1135 || TREE_CODE (type) == FUNCTION_TYPE
1136 || TREE_CODE (type) == METHOD_TYPE
1137 || TREE_CODE (type) == ARRAY_TYPE)
1138 type = TREE_TYPE (type);
1139
1140 mode = TYPE_MODE (type);
1141 switch (mode)
1142 {
1143 case E_DImode: case E_V2DImode:
1144 result = s390_builtin_types[BT_BV2DI];
1145 break;
1146 case E_SImode: case E_V4SImode:
1147 result = s390_builtin_types[BT_BV4SI];
1148 break;
1149 case E_HImode: case E_V8HImode:
1150 result = s390_builtin_types[BT_BV8HI];
1151 break;
1152 case E_QImode: case E_V16QImode:
1153 result = s390_builtin_types[BT_BV16QI];
1154 break;
1155 default:
1156 break;
1157 }
1158
1159 *no_add_attrs = true; /* No need to hang on to the attribute. */
1160
1161 if (result)
1162 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
1163
1164 return NULL_TREE;
1165 }
1166
1167 static const struct attribute_spec s390_attribute_table[] = {
1168 { "hotpatch", 2, 2, true, false, false, s390_handle_hotpatch_attribute, false },
1169 { "s390_vector_bool", 0, 0, false, true, false, s390_handle_vectorbool_attribute, true },
1170 /* End element. */
1171 { NULL, 0, 0, false, false, false, NULL, false }
1172 };
1173
1174 /* Return the alignment for LABEL. We default to the -falign-labels
1175 value except for the literal pool base label. */
1176 int
1177 s390_label_align (rtx_insn *label)
1178 {
1179 rtx_insn *prev_insn = prev_active_insn (label);
1180 rtx set, src;
1181
1182 if (prev_insn == NULL_RTX)
1183 goto old;
1184
1185 set = single_set (prev_insn);
1186
1187 if (set == NULL_RTX)
1188 goto old;
1189
1190 src = SET_SRC (set);
1191
1192 /* Don't align literal pool base labels. */
1193 if (GET_CODE (src) == UNSPEC
1194 && XINT (src, 1) == UNSPEC_MAIN_BASE)
1195 return 0;
1196
1197 old:
1198 return align_labels_log;
1199 }
1200
1201 static GTY(()) rtx got_symbol;
1202
1203 /* Return the GOT table symbol. The symbol will be created when the
1204 function is invoked for the first time. */
1205
1206 static rtx
1207 s390_got_symbol (void)
1208 {
1209 if (!got_symbol)
1210 {
1211 got_symbol = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
1212 SYMBOL_REF_FLAGS (got_symbol) = SYMBOL_FLAG_LOCAL;
1213 }
1214
1215 return got_symbol;
1216 }
1217
1218 static scalar_int_mode
1219 s390_libgcc_cmp_return_mode (void)
1220 {
1221 return TARGET_64BIT ? DImode : SImode;
1222 }
1223
1224 static scalar_int_mode
1225 s390_libgcc_shift_count_mode (void)
1226 {
1227 return TARGET_64BIT ? DImode : SImode;
1228 }
1229
1230 static scalar_int_mode
1231 s390_unwind_word_mode (void)
1232 {
1233 return TARGET_64BIT ? DImode : SImode;
1234 }
1235
1236 /* Return true if the back end supports mode MODE. */
1237 static bool
1238 s390_scalar_mode_supported_p (scalar_mode mode)
1239 {
1240 /* In contrast to the default implementation reject TImode constants on 31bit
1241 TARGET_ZARCH for ABI compliance. */
1242 if (!TARGET_64BIT && TARGET_ZARCH && mode == TImode)
1243 return false;
1244
1245 if (DECIMAL_FLOAT_MODE_P (mode))
1246 return default_decimal_float_supported_p ();
1247
1248 return default_scalar_mode_supported_p (mode);
1249 }
1250
1251 /* Return true if the back end supports vector mode MODE. */
1252 static bool
1253 s390_vector_mode_supported_p (machine_mode mode)
1254 {
1255 machine_mode inner;
1256
1257 if (!VECTOR_MODE_P (mode)
1258 || !TARGET_VX
1259 || GET_MODE_SIZE (mode) > 16)
1260 return false;
1261
1262 inner = GET_MODE_INNER (mode);
1263
1264 switch (inner)
1265 {
1266 case E_QImode:
1267 case E_HImode:
1268 case E_SImode:
1269 case E_DImode:
1270 case E_TImode:
1271 case E_SFmode:
1272 case E_DFmode:
1273 case E_TFmode:
1274 return true;
1275 default:
1276 return false;
1277 }
1278 }
1279
1280 /* Set the has_landing_pad_p flag in struct machine_function to VALUE. */
1281
1282 void
1283 s390_set_has_landing_pad_p (bool value)
1284 {
1285 cfun->machine->has_landing_pad_p = value;
1286 }
1287
1288 /* If two condition code modes are compatible, return a condition code
1289 mode which is compatible with both. Otherwise, return
1290 VOIDmode. */
1291
1292 static machine_mode
1293 s390_cc_modes_compatible (machine_mode m1, machine_mode m2)
1294 {
1295 if (m1 == m2)
1296 return m1;
1297
1298 switch (m1)
1299 {
1300 case E_CCZmode:
1301 if (m2 == CCUmode || m2 == CCTmode || m2 == CCZ1mode
1302 || m2 == CCSmode || m2 == CCSRmode || m2 == CCURmode)
1303 return m2;
1304 return VOIDmode;
1305
1306 case E_CCSmode:
1307 case E_CCUmode:
1308 case E_CCTmode:
1309 case E_CCSRmode:
1310 case E_CCURmode:
1311 case E_CCZ1mode:
1312 if (m2 == CCZmode)
1313 return m1;
1314
1315 return VOIDmode;
1316
1317 default:
1318 return VOIDmode;
1319 }
1320 return VOIDmode;
1321 }
1322
1323 /* Return true if SET either doesn't set the CC register, or else
1324 the source and destination have matching CC modes and that
1325 CC mode is at least as constrained as REQ_MODE. */
1326
1327 static bool
1328 s390_match_ccmode_set (rtx set, machine_mode req_mode)
1329 {
1330 machine_mode set_mode;
1331
1332 gcc_assert (GET_CODE (set) == SET);
1333
1334 /* These modes are supposed to be used only in CC consumer
1335 patterns. */
1336 gcc_assert (req_mode != CCVIALLmode && req_mode != CCVIANYmode
1337 && req_mode != CCVFALLmode && req_mode != CCVFANYmode);
1338
1339 if (GET_CODE (SET_DEST (set)) != REG || !CC_REGNO_P (REGNO (SET_DEST (set))))
1340 return 1;
1341
1342 set_mode = GET_MODE (SET_DEST (set));
1343 switch (set_mode)
1344 {
1345 case E_CCZ1mode:
1346 case E_CCSmode:
1347 case E_CCSRmode:
1348 case E_CCUmode:
1349 case E_CCURmode:
1350 case E_CCLmode:
1351 case E_CCL1mode:
1352 case E_CCL2mode:
1353 case E_CCL3mode:
1354 case E_CCT1mode:
1355 case E_CCT2mode:
1356 case E_CCT3mode:
1357 case E_CCVEQmode:
1358 case E_CCVIHmode:
1359 case E_CCVIHUmode:
1360 case E_CCVFHmode:
1361 case E_CCVFHEmode:
1362 if (req_mode != set_mode)
1363 return 0;
1364 break;
1365
1366 case E_CCZmode:
1367 if (req_mode != CCSmode && req_mode != CCUmode && req_mode != CCTmode
1368 && req_mode != CCSRmode && req_mode != CCURmode
1369 && req_mode != CCZ1mode)
1370 return 0;
1371 break;
1372
1373 case E_CCAPmode:
1374 case E_CCANmode:
1375 if (req_mode != CCAmode)
1376 return 0;
1377 break;
1378
1379 default:
1380 gcc_unreachable ();
1381 }
1382
1383 return (GET_MODE (SET_SRC (set)) == set_mode);
1384 }
1385
1386 /* Return true if every SET in INSN that sets the CC register
1387 has source and destination with matching CC modes and that
1388 CC mode is at least as constrained as REQ_MODE.
1389 If REQ_MODE is VOIDmode, always return false. */
1390
1391 bool
1392 s390_match_ccmode (rtx_insn *insn, machine_mode req_mode)
1393 {
1394 int i;
1395
1396 /* s390_tm_ccmode returns VOIDmode to indicate failure. */
1397 if (req_mode == VOIDmode)
1398 return false;
1399
1400 if (GET_CODE (PATTERN (insn)) == SET)
1401 return s390_match_ccmode_set (PATTERN (insn), req_mode);
1402
1403 if (GET_CODE (PATTERN (insn)) == PARALLEL)
1404 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1405 {
1406 rtx set = XVECEXP (PATTERN (insn), 0, i);
1407 if (GET_CODE (set) == SET)
1408 if (!s390_match_ccmode_set (set, req_mode))
1409 return false;
1410 }
1411
1412 return true;
1413 }
1414
1415 /* If a test-under-mask instruction can be used to implement
1416 (compare (and ... OP1) OP2), return the CC mode required
1417 to do that. Otherwise, return VOIDmode.
1418 MIXED is true if the instruction can distinguish between
1419 CC1 and CC2 for mixed selected bits (TMxx), it is false
1420 if the instruction cannot (TM). */
1421
1422 machine_mode
1423 s390_tm_ccmode (rtx op1, rtx op2, bool mixed)
1424 {
1425 int bit0, bit1;
1426
1427 /* ??? Fixme: should work on CONST_WIDE_INT as well. */
1428 if (GET_CODE (op1) != CONST_INT || GET_CODE (op2) != CONST_INT)
1429 return VOIDmode;
1430
1431 /* Selected bits all zero: CC0.
1432 e.g.: int a; if ((a & (16 + 128)) == 0) */
1433 if (INTVAL (op2) == 0)
1434 return CCTmode;
1435
1436 /* Selected bits all one: CC3.
1437 e.g.: int a; if ((a & (16 + 128)) == 16 + 128) */
1438 if (INTVAL (op2) == INTVAL (op1))
1439 return CCT3mode;
1440
1441 /* Exactly two bits selected, mixed zeroes and ones: CC1 or CC2. e.g.:
1442 int a;
1443 if ((a & (16 + 128)) == 16) -> CCT1
1444 if ((a & (16 + 128)) == 128) -> CCT2 */
1445 if (mixed)
1446 {
1447 bit1 = exact_log2 (INTVAL (op2));
1448 bit0 = exact_log2 (INTVAL (op1) ^ INTVAL (op2));
1449 if (bit0 != -1 && bit1 != -1)
1450 return bit0 > bit1 ? CCT1mode : CCT2mode;
1451 }
1452
1453 return VOIDmode;
1454 }
1455
1456 /* Given a comparison code OP (EQ, NE, etc.) and the operands
1457 OP0 and OP1 of a COMPARE, return the mode to be used for the
1458 comparison. */
1459
1460 machine_mode
1461 s390_select_ccmode (enum rtx_code code, rtx op0, rtx op1)
1462 {
1463 switch (code)
1464 {
1465 case EQ:
1466 case NE:
1467 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1468 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1469 return CCAPmode;
1470 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1471 && CONST_OK_FOR_K (INTVAL (XEXP (op0, 1))))
1472 return CCAPmode;
1473 if ((GET_CODE (op0) == PLUS || GET_CODE (op0) == MINUS
1474 || GET_CODE (op1) == NEG)
1475 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1476 return CCLmode;
1477
1478 if (GET_CODE (op0) == AND)
1479 {
1480 /* Check whether we can potentially do it via TM. */
1481 machine_mode ccmode;
1482 ccmode = s390_tm_ccmode (XEXP (op0, 1), op1, 1);
1483 if (ccmode != VOIDmode)
1484 {
1485 /* Relax CCTmode to CCZmode to allow fall-back to AND
1486 if that turns out to be beneficial. */
1487 return ccmode == CCTmode ? CCZmode : ccmode;
1488 }
1489 }
1490
1491 if (register_operand (op0, HImode)
1492 && GET_CODE (op1) == CONST_INT
1493 && (INTVAL (op1) == -1 || INTVAL (op1) == 65535))
1494 return CCT3mode;
1495 if (register_operand (op0, QImode)
1496 && GET_CODE (op1) == CONST_INT
1497 && (INTVAL (op1) == -1 || INTVAL (op1) == 255))
1498 return CCT3mode;
1499
1500 return CCZmode;
1501
1502 case LE:
1503 case LT:
1504 case GE:
1505 case GT:
1506 /* The only overflow condition of NEG and ABS happens when
1507 -INT_MAX is used as parameter, which stays negative. So
1508 we have an overflow from a positive value to a negative.
1509 Using CCAP mode the resulting cc can be used for comparisons. */
1510 if ((GET_CODE (op0) == NEG || GET_CODE (op0) == ABS)
1511 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1512 return CCAPmode;
1513
1514 /* If constants are involved in an add instruction it is possible to use
1515 the resulting cc for comparisons with zero. Knowing the sign of the
1516 constant the overflow behavior gets predictable. e.g.:
1517 int a, b; if ((b = a + c) > 0)
1518 with c as a constant value: c < 0 -> CCAN and c >= 0 -> CCAP */
1519 if (GET_CODE (op0) == PLUS && GET_CODE (XEXP (op0, 1)) == CONST_INT
1520 && (CONST_OK_FOR_K (INTVAL (XEXP (op0, 1)))
1521 || (CONST_OK_FOR_CONSTRAINT_P (INTVAL (XEXP (op0, 1)), 'O', "Os")
1522 /* Avoid INT32_MIN on 32 bit. */
1523 && (!TARGET_ZARCH || INTVAL (XEXP (op0, 1)) != -0x7fffffff - 1))))
1524 {
1525 if (INTVAL (XEXP((op0), 1)) < 0)
1526 return CCANmode;
1527 else
1528 return CCAPmode;
1529 }
1530 /* Fall through. */
1531 case UNORDERED:
1532 case ORDERED:
1533 case UNEQ:
1534 case UNLE:
1535 case UNLT:
1536 case UNGE:
1537 case UNGT:
1538 case LTGT:
1539 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1540 && GET_CODE (op1) != CONST_INT)
1541 return CCSRmode;
1542 return CCSmode;
1543
1544 case LTU:
1545 case GEU:
1546 if (GET_CODE (op0) == PLUS
1547 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1548 return CCL1mode;
1549
1550 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1551 && GET_CODE (op1) != CONST_INT)
1552 return CCURmode;
1553 return CCUmode;
1554
1555 case LEU:
1556 case GTU:
1557 if (GET_CODE (op0) == MINUS
1558 && GET_MODE_CLASS (GET_MODE (op0)) == MODE_INT)
1559 return CCL2mode;
1560
1561 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op0) == ZERO_EXTEND)
1562 && GET_CODE (op1) != CONST_INT)
1563 return CCURmode;
1564 return CCUmode;
1565
1566 default:
1567 gcc_unreachable ();
1568 }
1569 }
1570
1571 /* Replace the comparison OP0 CODE OP1 by a semantically equivalent one
1572 that we can implement more efficiently. */
1573
1574 static void
1575 s390_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1576 bool op0_preserve_value)
1577 {
1578 if (op0_preserve_value)
1579 return;
1580
1581 /* Convert ZERO_EXTRACT back to AND to enable TM patterns. */
1582 if ((*code == EQ || *code == NE)
1583 && *op1 == const0_rtx
1584 && GET_CODE (*op0) == ZERO_EXTRACT
1585 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1586 && GET_CODE (XEXP (*op0, 2)) == CONST_INT
1587 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1588 {
1589 rtx inner = XEXP (*op0, 0);
1590 HOST_WIDE_INT modesize = GET_MODE_BITSIZE (GET_MODE (inner));
1591 HOST_WIDE_INT len = INTVAL (XEXP (*op0, 1));
1592 HOST_WIDE_INT pos = INTVAL (XEXP (*op0, 2));
1593
1594 if (len > 0 && len < modesize
1595 && pos >= 0 && pos + len <= modesize
1596 && modesize <= HOST_BITS_PER_WIDE_INT)
1597 {
1598 unsigned HOST_WIDE_INT block;
1599 block = (HOST_WIDE_INT_1U << len) - 1;
1600 block <<= modesize - pos - len;
1601
1602 *op0 = gen_rtx_AND (GET_MODE (inner), inner,
1603 gen_int_mode (block, GET_MODE (inner)));
1604 }
1605 }
1606
1607 /* Narrow AND of memory against immediate to enable TM. */
1608 if ((*code == EQ || *code == NE)
1609 && *op1 == const0_rtx
1610 && GET_CODE (*op0) == AND
1611 && GET_CODE (XEXP (*op0, 1)) == CONST_INT
1612 && SCALAR_INT_MODE_P (GET_MODE (XEXP (*op0, 0))))
1613 {
1614 rtx inner = XEXP (*op0, 0);
1615 rtx mask = XEXP (*op0, 1);
1616
1617 /* Ignore paradoxical SUBREGs if all extra bits are masked out. */
1618 if (GET_CODE (inner) == SUBREG
1619 && SCALAR_INT_MODE_P (GET_MODE (SUBREG_REG (inner)))
1620 && (GET_MODE_SIZE (GET_MODE (inner))
1621 >= GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1622 && ((INTVAL (mask)
1623 & GET_MODE_MASK (GET_MODE (inner))
1624 & ~GET_MODE_MASK (GET_MODE (SUBREG_REG (inner))))
1625 == 0))
1626 inner = SUBREG_REG (inner);
1627
1628 /* Do not change volatile MEMs. */
1629 if (MEM_P (inner) && !MEM_VOLATILE_P (inner))
1630 {
1631 int part = s390_single_part (XEXP (*op0, 1),
1632 GET_MODE (inner), QImode, 0);
1633 if (part >= 0)
1634 {
1635 mask = gen_int_mode (s390_extract_part (mask, QImode, 0), QImode);
1636 inner = adjust_address_nv (inner, QImode, part);
1637 *op0 = gen_rtx_AND (QImode, inner, mask);
1638 }
1639 }
1640 }
1641
1642 /* Narrow comparisons against 0xffff to HImode if possible. */
1643 if ((*code == EQ || *code == NE)
1644 && GET_CODE (*op1) == CONST_INT
1645 && INTVAL (*op1) == 0xffff
1646 && SCALAR_INT_MODE_P (GET_MODE (*op0))
1647 && (nonzero_bits (*op0, GET_MODE (*op0))
1648 & ~HOST_WIDE_INT_UC (0xffff)) == 0)
1649 {
1650 *op0 = gen_lowpart (HImode, *op0);
1651 *op1 = constm1_rtx;
1652 }
1653
1654 /* Remove redundant UNSPEC_STRCMPCC_TO_INT conversions if possible. */
1655 if (GET_CODE (*op0) == UNSPEC
1656 && XINT (*op0, 1) == UNSPEC_STRCMPCC_TO_INT
1657 && XVECLEN (*op0, 0) == 1
1658 && GET_MODE (XVECEXP (*op0, 0, 0)) == CCUmode
1659 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1660 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1661 && *op1 == const0_rtx)
1662 {
1663 enum rtx_code new_code = UNKNOWN;
1664 switch (*code)
1665 {
1666 case EQ: new_code = EQ; break;
1667 case NE: new_code = NE; break;
1668 case LT: new_code = GTU; break;
1669 case GT: new_code = LTU; break;
1670 case LE: new_code = GEU; break;
1671 case GE: new_code = LEU; break;
1672 default: break;
1673 }
1674
1675 if (new_code != UNKNOWN)
1676 {
1677 *op0 = XVECEXP (*op0, 0, 0);
1678 *code = new_code;
1679 }
1680 }
1681
1682 /* Remove redundant UNSPEC_CC_TO_INT conversions if possible. */
1683 if (GET_CODE (*op0) == UNSPEC
1684 && XINT (*op0, 1) == UNSPEC_CC_TO_INT
1685 && XVECLEN (*op0, 0) == 1
1686 && GET_CODE (XVECEXP (*op0, 0, 0)) == REG
1687 && REGNO (XVECEXP (*op0, 0, 0)) == CC_REGNUM
1688 && CONST_INT_P (*op1))
1689 {
1690 enum rtx_code new_code = UNKNOWN;
1691 switch (GET_MODE (XVECEXP (*op0, 0, 0)))
1692 {
1693 case E_CCZmode:
1694 case E_CCRAWmode:
1695 switch (*code)
1696 {
1697 case EQ: new_code = EQ; break;
1698 case NE: new_code = NE; break;
1699 default: break;
1700 }
1701 break;
1702 default: break;
1703 }
1704
1705 if (new_code != UNKNOWN)
1706 {
1707 /* For CCRAWmode put the required cc mask into the second
1708 operand. */
1709 if (GET_MODE (XVECEXP (*op0, 0, 0)) == CCRAWmode
1710 && INTVAL (*op1) >= 0 && INTVAL (*op1) <= 3)
1711 *op1 = gen_rtx_CONST_INT (VOIDmode, 1 << (3 - INTVAL (*op1)));
1712 *op0 = XVECEXP (*op0, 0, 0);
1713 *code = new_code;
1714 }
1715 }
1716
1717 /* Simplify cascaded EQ, NE with const0_rtx. */
1718 if ((*code == NE || *code == EQ)
1719 && (GET_CODE (*op0) == EQ || GET_CODE (*op0) == NE)
1720 && GET_MODE (*op0) == SImode
1721 && GET_MODE (XEXP (*op0, 0)) == CCZ1mode
1722 && REG_P (XEXP (*op0, 0))
1723 && XEXP (*op0, 1) == const0_rtx
1724 && *op1 == const0_rtx)
1725 {
1726 if ((*code == EQ && GET_CODE (*op0) == NE)
1727 || (*code == NE && GET_CODE (*op0) == EQ))
1728 *code = EQ;
1729 else
1730 *code = NE;
1731 *op0 = XEXP (*op0, 0);
1732 }
1733
1734 /* Prefer register over memory as first operand. */
1735 if (MEM_P (*op0) && REG_P (*op1))
1736 {
1737 rtx tem = *op0; *op0 = *op1; *op1 = tem;
1738 *code = (int)swap_condition ((enum rtx_code)*code);
1739 }
1740
1741 /* A comparison result is compared against zero. Replace it with
1742 the (perhaps inverted) original comparison.
1743 This probably should be done by simplify_relational_operation. */
1744 if ((*code == EQ || *code == NE)
1745 && *op1 == const0_rtx
1746 && COMPARISON_P (*op0)
1747 && CC_REG_P (XEXP (*op0, 0)))
1748 {
1749 enum rtx_code new_code;
1750
1751 if (*code == EQ)
1752 new_code = reversed_comparison_code_parts (GET_CODE (*op0),
1753 XEXP (*op0, 0),
1754 XEXP (*op1, 0), NULL);
1755 else
1756 new_code = GET_CODE (*op0);
1757
1758 if (new_code != UNKNOWN)
1759 {
1760 *code = new_code;
1761 *op1 = XEXP (*op0, 1);
1762 *op0 = XEXP (*op0, 0);
1763 }
1764 }
1765 }
1766
1767
1768 /* Emit a compare instruction suitable to implement the comparison
1769 OP0 CODE OP1. Return the correct condition RTL to be placed in
1770 the IF_THEN_ELSE of the conditional branch testing the result. */
1771
1772 rtx
1773 s390_emit_compare (enum rtx_code code, rtx op0, rtx op1)
1774 {
1775 machine_mode mode = s390_select_ccmode (code, op0, op1);
1776 rtx cc;
1777
1778 if (GET_MODE_CLASS (GET_MODE (op0)) == MODE_CC)
1779 {
1780 /* Do not output a redundant compare instruction if a
1781 compare_and_swap pattern already computed the result and the
1782 machine modes are compatible. */
1783 gcc_assert (s390_cc_modes_compatible (GET_MODE (op0), mode)
1784 == GET_MODE (op0));
1785 cc = op0;
1786 }
1787 else
1788 {
1789 cc = gen_rtx_REG (mode, CC_REGNUM);
1790 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (mode, op0, op1)));
1791 }
1792
1793 return gen_rtx_fmt_ee (code, VOIDmode, cc, const0_rtx);
1794 }
1795
1796 /* Emit a SImode compare and swap instruction setting MEM to NEW_RTX if OLD
1797 matches CMP.
1798 Return the correct condition RTL to be placed in the IF_THEN_ELSE of the
1799 conditional branch testing the result. */
1800
1801 static rtx
1802 s390_emit_compare_and_swap (enum rtx_code code, rtx old, rtx mem,
1803 rtx cmp, rtx new_rtx, machine_mode ccmode)
1804 {
1805 rtx cc;
1806
1807 cc = gen_rtx_REG (ccmode, CC_REGNUM);
1808 switch (GET_MODE (mem))
1809 {
1810 case E_SImode:
1811 emit_insn (gen_atomic_compare_and_swapsi_internal (old, mem, cmp,
1812 new_rtx, cc));
1813 break;
1814 case E_DImode:
1815 emit_insn (gen_atomic_compare_and_swapdi_internal (old, mem, cmp,
1816 new_rtx, cc));
1817 break;
1818 case E_TImode:
1819 emit_insn (gen_atomic_compare_and_swapti_internal (old, mem, cmp,
1820 new_rtx, cc));
1821 break;
1822 case E_QImode:
1823 case E_HImode:
1824 default:
1825 gcc_unreachable ();
1826 }
1827 return s390_emit_compare (code, cc, const0_rtx);
1828 }
1829
1830 /* Emit a jump instruction to TARGET and return it. If COND is
1831 NULL_RTX, emit an unconditional jump, else a conditional jump under
1832 condition COND. */
1833
1834 rtx_insn *
1835 s390_emit_jump (rtx target, rtx cond)
1836 {
1837 rtx insn;
1838
1839 target = gen_rtx_LABEL_REF (VOIDmode, target);
1840 if (cond)
1841 target = gen_rtx_IF_THEN_ELSE (VOIDmode, cond, target, pc_rtx);
1842
1843 insn = gen_rtx_SET (pc_rtx, target);
1844 return emit_jump_insn (insn);
1845 }
1846
1847 /* Return branch condition mask to implement a branch
1848 specified by CODE. Return -1 for invalid comparisons. */
1849
1850 int
1851 s390_branch_condition_mask (rtx code)
1852 {
1853 const int CC0 = 1 << 3;
1854 const int CC1 = 1 << 2;
1855 const int CC2 = 1 << 1;
1856 const int CC3 = 1 << 0;
1857
1858 gcc_assert (GET_CODE (XEXP (code, 0)) == REG);
1859 gcc_assert (REGNO (XEXP (code, 0)) == CC_REGNUM);
1860 gcc_assert (XEXP (code, 1) == const0_rtx
1861 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
1862 && CONST_INT_P (XEXP (code, 1))));
1863
1864
1865 switch (GET_MODE (XEXP (code, 0)))
1866 {
1867 case E_CCZmode:
1868 case E_CCZ1mode:
1869 switch (GET_CODE (code))
1870 {
1871 case EQ: return CC0;
1872 case NE: return CC1 | CC2 | CC3;
1873 default: return -1;
1874 }
1875 break;
1876
1877 case E_CCT1mode:
1878 switch (GET_CODE (code))
1879 {
1880 case EQ: return CC1;
1881 case NE: return CC0 | CC2 | CC3;
1882 default: return -1;
1883 }
1884 break;
1885
1886 case E_CCT2mode:
1887 switch (GET_CODE (code))
1888 {
1889 case EQ: return CC2;
1890 case NE: return CC0 | CC1 | CC3;
1891 default: return -1;
1892 }
1893 break;
1894
1895 case E_CCT3mode:
1896 switch (GET_CODE (code))
1897 {
1898 case EQ: return CC3;
1899 case NE: return CC0 | CC1 | CC2;
1900 default: return -1;
1901 }
1902 break;
1903
1904 case E_CCLmode:
1905 switch (GET_CODE (code))
1906 {
1907 case EQ: return CC0 | CC2;
1908 case NE: return CC1 | CC3;
1909 default: return -1;
1910 }
1911 break;
1912
1913 case E_CCL1mode:
1914 switch (GET_CODE (code))
1915 {
1916 case LTU: return CC2 | CC3; /* carry */
1917 case GEU: return CC0 | CC1; /* no carry */
1918 default: return -1;
1919 }
1920 break;
1921
1922 case E_CCL2mode:
1923 switch (GET_CODE (code))
1924 {
1925 case GTU: return CC0 | CC1; /* borrow */
1926 case LEU: return CC2 | CC3; /* no borrow */
1927 default: return -1;
1928 }
1929 break;
1930
1931 case E_CCL3mode:
1932 switch (GET_CODE (code))
1933 {
1934 case EQ: return CC0 | CC2;
1935 case NE: return CC1 | CC3;
1936 case LTU: return CC1;
1937 case GTU: return CC3;
1938 case LEU: return CC1 | CC2;
1939 case GEU: return CC2 | CC3;
1940 default: return -1;
1941 }
1942
1943 case E_CCUmode:
1944 switch (GET_CODE (code))
1945 {
1946 case EQ: return CC0;
1947 case NE: return CC1 | CC2 | CC3;
1948 case LTU: return CC1;
1949 case GTU: return CC2;
1950 case LEU: return CC0 | CC1;
1951 case GEU: return CC0 | CC2;
1952 default: return -1;
1953 }
1954 break;
1955
1956 case E_CCURmode:
1957 switch (GET_CODE (code))
1958 {
1959 case EQ: return CC0;
1960 case NE: return CC2 | CC1 | CC3;
1961 case LTU: return CC2;
1962 case GTU: return CC1;
1963 case LEU: return CC0 | CC2;
1964 case GEU: return CC0 | CC1;
1965 default: return -1;
1966 }
1967 break;
1968
1969 case E_CCAPmode:
1970 switch (GET_CODE (code))
1971 {
1972 case EQ: return CC0;
1973 case NE: return CC1 | CC2 | CC3;
1974 case LT: return CC1 | CC3;
1975 case GT: return CC2;
1976 case LE: return CC0 | CC1 | CC3;
1977 case GE: return CC0 | CC2;
1978 default: return -1;
1979 }
1980 break;
1981
1982 case E_CCANmode:
1983 switch (GET_CODE (code))
1984 {
1985 case EQ: return CC0;
1986 case NE: return CC1 | CC2 | CC3;
1987 case LT: return CC1;
1988 case GT: return CC2 | CC3;
1989 case LE: return CC0 | CC1;
1990 case GE: return CC0 | CC2 | CC3;
1991 default: return -1;
1992 }
1993 break;
1994
1995 case E_CCSmode:
1996 switch (GET_CODE (code))
1997 {
1998 case EQ: return CC0;
1999 case NE: return CC1 | CC2 | CC3;
2000 case LT: return CC1;
2001 case GT: return CC2;
2002 case LE: return CC0 | CC1;
2003 case GE: return CC0 | CC2;
2004 case UNORDERED: return CC3;
2005 case ORDERED: return CC0 | CC1 | CC2;
2006 case UNEQ: return CC0 | CC3;
2007 case UNLT: return CC1 | CC3;
2008 case UNGT: return CC2 | CC3;
2009 case UNLE: return CC0 | CC1 | CC3;
2010 case UNGE: return CC0 | CC2 | CC3;
2011 case LTGT: return CC1 | CC2;
2012 default: return -1;
2013 }
2014 break;
2015
2016 case E_CCSRmode:
2017 switch (GET_CODE (code))
2018 {
2019 case EQ: return CC0;
2020 case NE: return CC2 | CC1 | CC3;
2021 case LT: return CC2;
2022 case GT: return CC1;
2023 case LE: return CC0 | CC2;
2024 case GE: return CC0 | CC1;
2025 case UNORDERED: return CC3;
2026 case ORDERED: return CC0 | CC2 | CC1;
2027 case UNEQ: return CC0 | CC3;
2028 case UNLT: return CC2 | CC3;
2029 case UNGT: return CC1 | CC3;
2030 case UNLE: return CC0 | CC2 | CC3;
2031 case UNGE: return CC0 | CC1 | CC3;
2032 case LTGT: return CC2 | CC1;
2033 default: return -1;
2034 }
2035 break;
2036
2037 /* Vector comparison modes. */
2038 /* CC2 will never be set. It however is part of the negated
2039 masks. */
2040 case E_CCVIALLmode:
2041 switch (GET_CODE (code))
2042 {
2043 case EQ:
2044 case GTU:
2045 case GT:
2046 case GE: return CC0;
2047 /* The inverted modes are in fact *any* modes. */
2048 case NE:
2049 case LEU:
2050 case LE:
2051 case LT: return CC3 | CC1 | CC2;
2052 default: return -1;
2053 }
2054
2055 case E_CCVIANYmode:
2056 switch (GET_CODE (code))
2057 {
2058 case EQ:
2059 case GTU:
2060 case GT:
2061 case GE: return CC0 | CC1;
2062 /* The inverted modes are in fact *all* modes. */
2063 case NE:
2064 case LEU:
2065 case LE:
2066 case LT: return CC3 | CC2;
2067 default: return -1;
2068 }
2069 case E_CCVFALLmode:
2070 switch (GET_CODE (code))
2071 {
2072 case EQ:
2073 case GT:
2074 case GE: return CC0;
2075 /* The inverted modes are in fact *any* modes. */
2076 case NE:
2077 case UNLE:
2078 case UNLT: return CC3 | CC1 | CC2;
2079 default: return -1;
2080 }
2081
2082 case E_CCVFANYmode:
2083 switch (GET_CODE (code))
2084 {
2085 case EQ:
2086 case GT:
2087 case GE: return CC0 | CC1;
2088 /* The inverted modes are in fact *all* modes. */
2089 case NE:
2090 case UNLE:
2091 case UNLT: return CC3 | CC2;
2092 default: return -1;
2093 }
2094
2095 case E_CCRAWmode:
2096 switch (GET_CODE (code))
2097 {
2098 case EQ:
2099 return INTVAL (XEXP (code, 1));
2100 case NE:
2101 return (INTVAL (XEXP (code, 1))) ^ 0xf;
2102 default:
2103 gcc_unreachable ();
2104 }
2105
2106 default:
2107 return -1;
2108 }
2109 }
2110
2111
2112 /* Return branch condition mask to implement a compare and branch
2113 specified by CODE. Return -1 for invalid comparisons. */
2114
2115 int
2116 s390_compare_and_branch_condition_mask (rtx code)
2117 {
2118 const int CC0 = 1 << 3;
2119 const int CC1 = 1 << 2;
2120 const int CC2 = 1 << 1;
2121
2122 switch (GET_CODE (code))
2123 {
2124 case EQ:
2125 return CC0;
2126 case NE:
2127 return CC1 | CC2;
2128 case LT:
2129 case LTU:
2130 return CC1;
2131 case GT:
2132 case GTU:
2133 return CC2;
2134 case LE:
2135 case LEU:
2136 return CC0 | CC1;
2137 case GE:
2138 case GEU:
2139 return CC0 | CC2;
2140 default:
2141 gcc_unreachable ();
2142 }
2143 return -1;
2144 }
2145
2146 /* If INV is false, return assembler mnemonic string to implement
2147 a branch specified by CODE. If INV is true, return mnemonic
2148 for the corresponding inverted branch. */
2149
2150 static const char *
2151 s390_branch_condition_mnemonic (rtx code, int inv)
2152 {
2153 int mask;
2154
2155 static const char *const mnemonic[16] =
2156 {
2157 NULL, "o", "h", "nle",
2158 "l", "nhe", "lh", "ne",
2159 "e", "nlh", "he", "nl",
2160 "le", "nh", "no", NULL
2161 };
2162
2163 if (GET_CODE (XEXP (code, 0)) == REG
2164 && REGNO (XEXP (code, 0)) == CC_REGNUM
2165 && (XEXP (code, 1) == const0_rtx
2166 || (GET_MODE (XEXP (code, 0)) == CCRAWmode
2167 && CONST_INT_P (XEXP (code, 1)))))
2168 mask = s390_branch_condition_mask (code);
2169 else
2170 mask = s390_compare_and_branch_condition_mask (code);
2171
2172 gcc_assert (mask >= 0);
2173
2174 if (inv)
2175 mask ^= 15;
2176
2177 gcc_assert (mask >= 1 && mask <= 14);
2178
2179 return mnemonic[mask];
2180 }
2181
2182 /* Return the part of op which has a value different from def.
2183 The size of the part is determined by mode.
2184 Use this function only if you already know that op really
2185 contains such a part. */
2186
2187 unsigned HOST_WIDE_INT
2188 s390_extract_part (rtx op, machine_mode mode, int def)
2189 {
2190 unsigned HOST_WIDE_INT value = 0;
2191 int max_parts = HOST_BITS_PER_WIDE_INT / GET_MODE_BITSIZE (mode);
2192 int part_bits = GET_MODE_BITSIZE (mode);
2193 unsigned HOST_WIDE_INT part_mask = (HOST_WIDE_INT_1U << part_bits) - 1;
2194 int i;
2195
2196 for (i = 0; i < max_parts; i++)
2197 {
2198 if (i == 0)
2199 value = UINTVAL (op);
2200 else
2201 value >>= part_bits;
2202
2203 if ((value & part_mask) != (def & part_mask))
2204 return value & part_mask;
2205 }
2206
2207 gcc_unreachable ();
2208 }
2209
2210 /* If OP is an integer constant of mode MODE with exactly one
2211 part of mode PART_MODE unequal to DEF, return the number of that
2212 part. Otherwise, return -1. */
2213
2214 int
2215 s390_single_part (rtx op,
2216 machine_mode mode,
2217 machine_mode part_mode,
2218 int def)
2219 {
2220 unsigned HOST_WIDE_INT value = 0;
2221 int n_parts = GET_MODE_SIZE (mode) / GET_MODE_SIZE (part_mode);
2222 unsigned HOST_WIDE_INT part_mask
2223 = (HOST_WIDE_INT_1U << GET_MODE_BITSIZE (part_mode)) - 1;
2224 int i, part = -1;
2225
2226 if (GET_CODE (op) != CONST_INT)
2227 return -1;
2228
2229 for (i = 0; i < n_parts; i++)
2230 {
2231 if (i == 0)
2232 value = UINTVAL (op);
2233 else
2234 value >>= GET_MODE_BITSIZE (part_mode);
2235
2236 if ((value & part_mask) != (def & part_mask))
2237 {
2238 if (part != -1)
2239 return -1;
2240 else
2241 part = i;
2242 }
2243 }
2244 return part == -1 ? -1 : n_parts - 1 - part;
2245 }
2246
2247 /* Return true if IN contains a contiguous bitfield in the lower SIZE
2248 bits and no other bits are set in (the lower SIZE bits of) IN.
2249
2250 PSTART and PEND can be used to obtain the start and end
2251 position (inclusive) of the bitfield relative to 64
2252 bits. *PSTART / *PEND gives the position of the first/last bit
2253 of the bitfield counting from the highest order bit starting
2254 with zero. */
2255
2256 bool
2257 s390_contiguous_bitmask_nowrap_p (unsigned HOST_WIDE_INT in, int size,
2258 int *pstart, int *pend)
2259 {
2260 int start;
2261 int end = -1;
2262 int lowbit = HOST_BITS_PER_WIDE_INT - 1;
2263 int highbit = HOST_BITS_PER_WIDE_INT - size;
2264 unsigned HOST_WIDE_INT bitmask = HOST_WIDE_INT_1U;
2265
2266 gcc_assert (!!pstart == !!pend);
2267 for (start = lowbit; start >= highbit; bitmask <<= 1, start--)
2268 if (end == -1)
2269 {
2270 /* Look for the rightmost bit of a contiguous range of ones. */
2271 if (bitmask & in)
2272 /* Found it. */
2273 end = start;
2274 }
2275 else
2276 {
2277 /* Look for the firt zero bit after the range of ones. */
2278 if (! (bitmask & in))
2279 /* Found it. */
2280 break;
2281 }
2282 /* We're one past the last one-bit. */
2283 start++;
2284
2285 if (end == -1)
2286 /* No one bits found. */
2287 return false;
2288
2289 if (start > highbit)
2290 {
2291 unsigned HOST_WIDE_INT mask;
2292
2293 /* Calculate a mask for all bits beyond the contiguous bits. */
2294 mask = ((~HOST_WIDE_INT_0U >> highbit)
2295 & (~HOST_WIDE_INT_0U << (lowbit - start + 1)));
2296 if (mask & in)
2297 /* There are more bits set beyond the first range of one bits. */
2298 return false;
2299 }
2300
2301 if (pstart)
2302 {
2303 *pstart = start;
2304 *pend = end;
2305 }
2306
2307 return true;
2308 }
2309
2310 /* Same as s390_contiguous_bitmask_nowrap_p but also returns true
2311 if ~IN contains a contiguous bitfield. In that case, *END is <
2312 *START.
2313
2314 If WRAP_P is true, a bitmask that wraps around is also tested.
2315 When a wraparoud occurs *START is greater than *END (in
2316 non-null pointers), and the uppermost (64 - SIZE) bits are thus
2317 part of the range. If WRAP_P is false, no wraparound is
2318 tested. */
2319
2320 bool
2321 s390_contiguous_bitmask_p (unsigned HOST_WIDE_INT in, bool wrap_p,
2322 int size, int *start, int *end)
2323 {
2324 int bs = HOST_BITS_PER_WIDE_INT;
2325 bool b;
2326
2327 gcc_assert (!!start == !!end);
2328 if ((in & ((~HOST_WIDE_INT_0U) >> (bs - size))) == 0)
2329 /* This cannot be expressed as a contiguous bitmask. Exit early because
2330 the second call of s390_contiguous_bitmask_nowrap_p would accept this as
2331 a valid bitmask. */
2332 return false;
2333 b = s390_contiguous_bitmask_nowrap_p (in, size, start, end);
2334 if (b)
2335 return true;
2336 if (! wrap_p)
2337 return false;
2338 b = s390_contiguous_bitmask_nowrap_p (~in, size, start, end);
2339 if (b && start)
2340 {
2341 int s = *start;
2342 int e = *end;
2343
2344 gcc_assert (s >= 1);
2345 *start = ((e + 1) & (bs - 1));
2346 *end = ((s - 1 + bs) & (bs - 1));
2347 }
2348
2349 return b;
2350 }
2351
2352 /* Return true if OP contains the same contiguous bitfield in *all*
2353 its elements. START and END can be used to obtain the start and
2354 end position of the bitfield.
2355
2356 START/STOP give the position of the first/last bit of the bitfield
2357 counting from the lowest order bit starting with zero. In order to
2358 use these values for S/390 instructions this has to be converted to
2359 "bits big endian" style. */
2360
2361 bool
2362 s390_contiguous_bitmask_vector_p (rtx op, int *start, int *end)
2363 {
2364 unsigned HOST_WIDE_INT mask;
2365 int size;
2366 rtx elt;
2367 bool b;
2368
2369 gcc_assert (!!start == !!end);
2370 if (!const_vec_duplicate_p (op, &elt)
2371 || !CONST_INT_P (elt))
2372 return false;
2373
2374 size = GET_MODE_UNIT_BITSIZE (GET_MODE (op));
2375
2376 /* We cannot deal with V1TI/V1TF. This would require a vgmq. */
2377 if (size > 64)
2378 return false;
2379
2380 mask = UINTVAL (elt);
2381
2382 b = s390_contiguous_bitmask_p (mask, true, size, start, end);
2383 if (b)
2384 {
2385 if (start)
2386 {
2387 *start -= (HOST_BITS_PER_WIDE_INT - size);
2388 *end -= (HOST_BITS_PER_WIDE_INT - size);
2389 }
2390 return true;
2391 }
2392 else
2393 return false;
2394 }
2395
2396 /* Return true if C consists only of byte chunks being either 0 or
2397 0xff. If MASK is !=NULL a byte mask is generated which is
2398 appropriate for the vector generate byte mask instruction. */
2399
2400 bool
2401 s390_bytemask_vector_p (rtx op, unsigned *mask)
2402 {
2403 int i;
2404 unsigned tmp_mask = 0;
2405 int nunit, unit_size;
2406
2407 if (!VECTOR_MODE_P (GET_MODE (op))
2408 || GET_CODE (op) != CONST_VECTOR
2409 || !CONST_INT_P (XVECEXP (op, 0, 0)))
2410 return false;
2411
2412 nunit = GET_MODE_NUNITS (GET_MODE (op));
2413 unit_size = GET_MODE_UNIT_SIZE (GET_MODE (op));
2414
2415 for (i = 0; i < nunit; i++)
2416 {
2417 unsigned HOST_WIDE_INT c;
2418 int j;
2419
2420 if (!CONST_INT_P (XVECEXP (op, 0, i)))
2421 return false;
2422
2423 c = UINTVAL (XVECEXP (op, 0, i));
2424 for (j = 0; j < unit_size; j++)
2425 {
2426 if ((c & 0xff) != 0 && (c & 0xff) != 0xff)
2427 return false;
2428 tmp_mask |= (c & 1) << ((nunit - 1 - i) * unit_size + j);
2429 c = c >> BITS_PER_UNIT;
2430 }
2431 }
2432
2433 if (mask != NULL)
2434 *mask = tmp_mask;
2435
2436 return true;
2437 }
2438
2439 /* Check whether a rotate of ROTL followed by an AND of CONTIG is
2440 equivalent to a shift followed by the AND. In particular, CONTIG
2441 should not overlap the (rotated) bit 0/bit 63 gap. Negative values
2442 for ROTL indicate a rotate to the right. */
2443
2444 bool
2445 s390_extzv_shift_ok (int bitsize, int rotl, unsigned HOST_WIDE_INT contig)
2446 {
2447 int start, end;
2448 bool ok;
2449
2450 ok = s390_contiguous_bitmask_nowrap_p (contig, bitsize, &start, &end);
2451 gcc_assert (ok);
2452
2453 if (rotl >= 0)
2454 return (64 - end >= rotl);
2455 else
2456 {
2457 /* Translate "- rotate right" in BITSIZE mode to "rotate left" in
2458 DIMode. */
2459 rotl = -rotl + (64 - bitsize);
2460 return (start >= rotl);
2461 }
2462 }
2463
2464 /* Check whether we can (and want to) split a double-word
2465 move in mode MODE from SRC to DST into two single-word
2466 moves, moving the subword FIRST_SUBWORD first. */
2467
2468 bool
2469 s390_split_ok_p (rtx dst, rtx src, machine_mode mode, int first_subword)
2470 {
2471 /* Floating point and vector registers cannot be split. */
2472 if (FP_REG_P (src) || FP_REG_P (dst) || VECTOR_REG_P (src) || VECTOR_REG_P (dst))
2473 return false;
2474
2475 /* Non-offsettable memory references cannot be split. */
2476 if ((GET_CODE (src) == MEM && !offsettable_memref_p (src))
2477 || (GET_CODE (dst) == MEM && !offsettable_memref_p (dst)))
2478 return false;
2479
2480 /* Moving the first subword must not clobber a register
2481 needed to move the second subword. */
2482 if (register_operand (dst, mode))
2483 {
2484 rtx subreg = operand_subword (dst, first_subword, 0, mode);
2485 if (reg_overlap_mentioned_p (subreg, src))
2486 return false;
2487 }
2488
2489 return true;
2490 }
2491
2492 /* Return true if it can be proven that [MEM1, MEM1 + SIZE]
2493 and [MEM2, MEM2 + SIZE] do overlap and false
2494 otherwise. */
2495
2496 bool
2497 s390_overlap_p (rtx mem1, rtx mem2, HOST_WIDE_INT size)
2498 {
2499 rtx addr1, addr2, addr_delta;
2500 HOST_WIDE_INT delta;
2501
2502 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2503 return true;
2504
2505 if (size == 0)
2506 return false;
2507
2508 addr1 = XEXP (mem1, 0);
2509 addr2 = XEXP (mem2, 0);
2510
2511 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2512
2513 /* This overlapping check is used by peepholes merging memory block operations.
2514 Overlapping operations would otherwise be recognized by the S/390 hardware
2515 and would fall back to a slower implementation. Allowing overlapping
2516 operations would lead to slow code but not to wrong code. Therefore we are
2517 somewhat optimistic if we cannot prove that the memory blocks are
2518 overlapping.
2519 That's why we return false here although this may accept operations on
2520 overlapping memory areas. */
2521 if (!addr_delta || GET_CODE (addr_delta) != CONST_INT)
2522 return false;
2523
2524 delta = INTVAL (addr_delta);
2525
2526 if (delta == 0
2527 || (delta > 0 && delta < size)
2528 || (delta < 0 && -delta < size))
2529 return true;
2530
2531 return false;
2532 }
2533
2534 /* Check whether the address of memory reference MEM2 equals exactly
2535 the address of memory reference MEM1 plus DELTA. Return true if
2536 we can prove this to be the case, false otherwise. */
2537
2538 bool
2539 s390_offset_p (rtx mem1, rtx mem2, rtx delta)
2540 {
2541 rtx addr1, addr2, addr_delta;
2542
2543 if (GET_CODE (mem1) != MEM || GET_CODE (mem2) != MEM)
2544 return false;
2545
2546 addr1 = XEXP (mem1, 0);
2547 addr2 = XEXP (mem2, 0);
2548
2549 addr_delta = simplify_binary_operation (MINUS, Pmode, addr2, addr1);
2550 if (!addr_delta || !rtx_equal_p (addr_delta, delta))
2551 return false;
2552
2553 return true;
2554 }
2555
2556 /* Expand logical operator CODE in mode MODE with operands OPERANDS. */
2557
2558 void
2559 s390_expand_logical_operator (enum rtx_code code, machine_mode mode,
2560 rtx *operands)
2561 {
2562 machine_mode wmode = mode;
2563 rtx dst = operands[0];
2564 rtx src1 = operands[1];
2565 rtx src2 = operands[2];
2566 rtx op, clob, tem;
2567
2568 /* If we cannot handle the operation directly, use a temp register. */
2569 if (!s390_logical_operator_ok_p (operands))
2570 dst = gen_reg_rtx (mode);
2571
2572 /* QImode and HImode patterns make sense only if we have a destination
2573 in memory. Otherwise perform the operation in SImode. */
2574 if ((mode == QImode || mode == HImode) && GET_CODE (dst) != MEM)
2575 wmode = SImode;
2576
2577 /* Widen operands if required. */
2578 if (mode != wmode)
2579 {
2580 if (GET_CODE (dst) == SUBREG
2581 && (tem = simplify_subreg (wmode, dst, mode, 0)) != 0)
2582 dst = tem;
2583 else if (REG_P (dst))
2584 dst = gen_rtx_SUBREG (wmode, dst, 0);
2585 else
2586 dst = gen_reg_rtx (wmode);
2587
2588 if (GET_CODE (src1) == SUBREG
2589 && (tem = simplify_subreg (wmode, src1, mode, 0)) != 0)
2590 src1 = tem;
2591 else if (GET_MODE (src1) != VOIDmode)
2592 src1 = gen_rtx_SUBREG (wmode, force_reg (mode, src1), 0);
2593
2594 if (GET_CODE (src2) == SUBREG
2595 && (tem = simplify_subreg (wmode, src2, mode, 0)) != 0)
2596 src2 = tem;
2597 else if (GET_MODE (src2) != VOIDmode)
2598 src2 = gen_rtx_SUBREG (wmode, force_reg (mode, src2), 0);
2599 }
2600
2601 /* Emit the instruction. */
2602 op = gen_rtx_SET (dst, gen_rtx_fmt_ee (code, wmode, src1, src2));
2603 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
2604 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
2605
2606 /* Fix up the destination if needed. */
2607 if (dst != operands[0])
2608 emit_move_insn (operands[0], gen_lowpart (mode, dst));
2609 }
2610
2611 /* Check whether OPERANDS are OK for a logical operation (AND, IOR, XOR). */
2612
2613 bool
2614 s390_logical_operator_ok_p (rtx *operands)
2615 {
2616 /* If the destination operand is in memory, it needs to coincide
2617 with one of the source operands. After reload, it has to be
2618 the first source operand. */
2619 if (GET_CODE (operands[0]) == MEM)
2620 return rtx_equal_p (operands[0], operands[1])
2621 || (!reload_completed && rtx_equal_p (operands[0], operands[2]));
2622
2623 return true;
2624 }
2625
2626 /* Narrow logical operation CODE of memory operand MEMOP with immediate
2627 operand IMMOP to switch from SS to SI type instructions. */
2628
2629 void
2630 s390_narrow_logical_operator (enum rtx_code code, rtx *memop, rtx *immop)
2631 {
2632 int def = code == AND ? -1 : 0;
2633 HOST_WIDE_INT mask;
2634 int part;
2635
2636 gcc_assert (GET_CODE (*memop) == MEM);
2637 gcc_assert (!MEM_VOLATILE_P (*memop));
2638
2639 mask = s390_extract_part (*immop, QImode, def);
2640 part = s390_single_part (*immop, GET_MODE (*memop), QImode, def);
2641 gcc_assert (part >= 0);
2642
2643 *memop = adjust_address (*memop, QImode, part);
2644 *immop = gen_int_mode (mask, QImode);
2645 }
2646
2647
2648 /* How to allocate a 'struct machine_function'. */
2649
2650 static struct machine_function *
2651 s390_init_machine_status (void)
2652 {
2653 return ggc_cleared_alloc<machine_function> ();
2654 }
2655
2656 /* Map for smallest class containing reg regno. */
2657
2658 const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER] =
2659 { GENERAL_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 0 */
2660 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 4 */
2661 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 8 */
2662 ADDR_REGS, ADDR_REGS, ADDR_REGS, ADDR_REGS, /* 12 */
2663 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 16 */
2664 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 20 */
2665 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 24 */
2666 FP_REGS, FP_REGS, FP_REGS, FP_REGS, /* 28 */
2667 ADDR_REGS, CC_REGS, ADDR_REGS, ADDR_REGS, /* 32 */
2668 ACCESS_REGS, ACCESS_REGS, VEC_REGS, VEC_REGS, /* 36 */
2669 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 40 */
2670 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 44 */
2671 VEC_REGS, VEC_REGS, VEC_REGS, VEC_REGS, /* 48 */
2672 VEC_REGS, VEC_REGS /* 52 */
2673 };
2674
2675 /* Return attribute type of insn. */
2676
2677 static enum attr_type
2678 s390_safe_attr_type (rtx_insn *insn)
2679 {
2680 if (recog_memoized (insn) >= 0)
2681 return get_attr_type (insn);
2682 else
2683 return TYPE_NONE;
2684 }
2685
2686 /* Return true if DISP is a valid short displacement. */
2687
2688 static bool
2689 s390_short_displacement (rtx disp)
2690 {
2691 /* No displacement is OK. */
2692 if (!disp)
2693 return true;
2694
2695 /* Without the long displacement facility we don't need to
2696 distingiush between long and short displacement. */
2697 if (!TARGET_LONG_DISPLACEMENT)
2698 return true;
2699
2700 /* Integer displacement in range. */
2701 if (GET_CODE (disp) == CONST_INT)
2702 return INTVAL (disp) >= 0 && INTVAL (disp) < 4096;
2703
2704 /* GOT offset is not OK, the GOT can be large. */
2705 if (GET_CODE (disp) == CONST
2706 && GET_CODE (XEXP (disp, 0)) == UNSPEC
2707 && (XINT (XEXP (disp, 0), 1) == UNSPEC_GOT
2708 || XINT (XEXP (disp, 0), 1) == UNSPEC_GOTNTPOFF))
2709 return false;
2710
2711 /* All other symbolic constants are literal pool references,
2712 which are OK as the literal pool must be small. */
2713 if (GET_CODE (disp) == CONST)
2714 return true;
2715
2716 return false;
2717 }
2718
2719 /* Decompose a RTL expression ADDR for a memory address into
2720 its components, returned in OUT.
2721
2722 Returns false if ADDR is not a valid memory address, true
2723 otherwise. If OUT is NULL, don't return the components,
2724 but check for validity only.
2725
2726 Note: Only addresses in canonical form are recognized.
2727 LEGITIMIZE_ADDRESS should convert non-canonical forms to the
2728 canonical form so that they will be recognized. */
2729
2730 static int
2731 s390_decompose_address (rtx addr, struct s390_address *out)
2732 {
2733 HOST_WIDE_INT offset = 0;
2734 rtx base = NULL_RTX;
2735 rtx indx = NULL_RTX;
2736 rtx disp = NULL_RTX;
2737 rtx orig_disp;
2738 bool pointer = false;
2739 bool base_ptr = false;
2740 bool indx_ptr = false;
2741 bool literal_pool = false;
2742
2743 /* We may need to substitute the literal pool base register into the address
2744 below. However, at this point we do not know which register is going to
2745 be used as base, so we substitute the arg pointer register. This is going
2746 to be treated as holding a pointer below -- it shouldn't be used for any
2747 other purpose. */
2748 rtx fake_pool_base = gen_rtx_REG (Pmode, ARG_POINTER_REGNUM);
2749
2750 /* Decompose address into base + index + displacement. */
2751
2752 if (GET_CODE (addr) == REG || GET_CODE (addr) == UNSPEC)
2753 base = addr;
2754
2755 else if (GET_CODE (addr) == PLUS)
2756 {
2757 rtx op0 = XEXP (addr, 0);
2758 rtx op1 = XEXP (addr, 1);
2759 enum rtx_code code0 = GET_CODE (op0);
2760 enum rtx_code code1 = GET_CODE (op1);
2761
2762 if (code0 == REG || code0 == UNSPEC)
2763 {
2764 if (code1 == REG || code1 == UNSPEC)
2765 {
2766 indx = op0; /* index + base */
2767 base = op1;
2768 }
2769
2770 else
2771 {
2772 base = op0; /* base + displacement */
2773 disp = op1;
2774 }
2775 }
2776
2777 else if (code0 == PLUS)
2778 {
2779 indx = XEXP (op0, 0); /* index + base + disp */
2780 base = XEXP (op0, 1);
2781 disp = op1;
2782 }
2783
2784 else
2785 {
2786 return false;
2787 }
2788 }
2789
2790 else
2791 disp = addr; /* displacement */
2792
2793 /* Extract integer part of displacement. */
2794 orig_disp = disp;
2795 if (disp)
2796 {
2797 if (GET_CODE (disp) == CONST_INT)
2798 {
2799 offset = INTVAL (disp);
2800 disp = NULL_RTX;
2801 }
2802 else if (GET_CODE (disp) == CONST
2803 && GET_CODE (XEXP (disp, 0)) == PLUS
2804 && GET_CODE (XEXP (XEXP (disp, 0), 1)) == CONST_INT)
2805 {
2806 offset = INTVAL (XEXP (XEXP (disp, 0), 1));
2807 disp = XEXP (XEXP (disp, 0), 0);
2808 }
2809 }
2810
2811 /* Strip off CONST here to avoid special case tests later. */
2812 if (disp && GET_CODE (disp) == CONST)
2813 disp = XEXP (disp, 0);
2814
2815 /* We can convert literal pool addresses to
2816 displacements by basing them off the base register. */
2817 if (disp && GET_CODE (disp) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (disp))
2818 {
2819 if (base || indx)
2820 return false;
2821
2822 base = fake_pool_base, literal_pool = true;
2823
2824 /* Mark up the displacement. */
2825 disp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, disp),
2826 UNSPEC_LTREL_OFFSET);
2827 }
2828
2829 /* Validate base register. */
2830 if (base)
2831 {
2832 if (GET_CODE (base) == UNSPEC)
2833 switch (XINT (base, 1))
2834 {
2835 case UNSPEC_LTREF:
2836 if (!disp)
2837 disp = gen_rtx_UNSPEC (Pmode,
2838 gen_rtvec (1, XVECEXP (base, 0, 0)),
2839 UNSPEC_LTREL_OFFSET);
2840 else
2841 return false;
2842
2843 base = XVECEXP (base, 0, 1);
2844 break;
2845
2846 case UNSPEC_LTREL_BASE:
2847 if (XVECLEN (base, 0) == 1)
2848 base = fake_pool_base, literal_pool = true;
2849 else
2850 base = XVECEXP (base, 0, 1);
2851 break;
2852
2853 default:
2854 return false;
2855 }
2856
2857 if (!REG_P (base) || GET_MODE (base) != Pmode)
2858 return false;
2859
2860 if (REGNO (base) == STACK_POINTER_REGNUM
2861 || REGNO (base) == FRAME_POINTER_REGNUM
2862 || ((reload_completed || reload_in_progress)
2863 && frame_pointer_needed
2864 && REGNO (base) == HARD_FRAME_POINTER_REGNUM)
2865 || REGNO (base) == ARG_POINTER_REGNUM
2866 || (flag_pic
2867 && REGNO (base) == PIC_OFFSET_TABLE_REGNUM))
2868 pointer = base_ptr = true;
2869
2870 if ((reload_completed || reload_in_progress)
2871 && base == cfun->machine->base_reg)
2872 pointer = base_ptr = literal_pool = true;
2873 }
2874
2875 /* Validate index register. */
2876 if (indx)
2877 {
2878 if (GET_CODE (indx) == UNSPEC)
2879 switch (XINT (indx, 1))
2880 {
2881 case UNSPEC_LTREF:
2882 if (!disp)
2883 disp = gen_rtx_UNSPEC (Pmode,
2884 gen_rtvec (1, XVECEXP (indx, 0, 0)),
2885 UNSPEC_LTREL_OFFSET);
2886 else
2887 return false;
2888
2889 indx = XVECEXP (indx, 0, 1);
2890 break;
2891
2892 case UNSPEC_LTREL_BASE:
2893 if (XVECLEN (indx, 0) == 1)
2894 indx = fake_pool_base, literal_pool = true;
2895 else
2896 indx = XVECEXP (indx, 0, 1);
2897 break;
2898
2899 default:
2900 return false;
2901 }
2902
2903 if (!REG_P (indx) || GET_MODE (indx) != Pmode)
2904 return false;
2905
2906 if (REGNO (indx) == STACK_POINTER_REGNUM
2907 || REGNO (indx) == FRAME_POINTER_REGNUM
2908 || ((reload_completed || reload_in_progress)
2909 && frame_pointer_needed
2910 && REGNO (indx) == HARD_FRAME_POINTER_REGNUM)
2911 || REGNO (indx) == ARG_POINTER_REGNUM
2912 || (flag_pic
2913 && REGNO (indx) == PIC_OFFSET_TABLE_REGNUM))
2914 pointer = indx_ptr = true;
2915
2916 if ((reload_completed || reload_in_progress)
2917 && indx == cfun->machine->base_reg)
2918 pointer = indx_ptr = literal_pool = true;
2919 }
2920
2921 /* Prefer to use pointer as base, not index. */
2922 if (base && indx && !base_ptr
2923 && (indx_ptr || (!REG_POINTER (base) && REG_POINTER (indx))))
2924 {
2925 rtx tmp = base;
2926 base = indx;
2927 indx = tmp;
2928 }
2929
2930 /* Validate displacement. */
2931 if (!disp)
2932 {
2933 /* If virtual registers are involved, the displacement will change later
2934 anyway as the virtual registers get eliminated. This could make a
2935 valid displacement invalid, but it is more likely to make an invalid
2936 displacement valid, because we sometimes access the register save area
2937 via negative offsets to one of those registers.
2938 Thus we don't check the displacement for validity here. If after
2939 elimination the displacement turns out to be invalid after all,
2940 this is fixed up by reload in any case. */
2941 /* LRA maintains always displacements up to date and we need to
2942 know the displacement is right during all LRA not only at the
2943 final elimination. */
2944 if (lra_in_progress
2945 || (base != arg_pointer_rtx
2946 && indx != arg_pointer_rtx
2947 && base != return_address_pointer_rtx
2948 && indx != return_address_pointer_rtx
2949 && base != frame_pointer_rtx
2950 && indx != frame_pointer_rtx
2951 && base != virtual_stack_vars_rtx
2952 && indx != virtual_stack_vars_rtx))
2953 if (!DISP_IN_RANGE (offset))
2954 return false;
2955 }
2956 else
2957 {
2958 /* All the special cases are pointers. */
2959 pointer = true;
2960
2961 /* In the small-PIC case, the linker converts @GOT
2962 and @GOTNTPOFF offsets to possible displacements. */
2963 if (GET_CODE (disp) == UNSPEC
2964 && (XINT (disp, 1) == UNSPEC_GOT
2965 || XINT (disp, 1) == UNSPEC_GOTNTPOFF)
2966 && flag_pic == 1)
2967 {
2968 ;
2969 }
2970
2971 /* Accept pool label offsets. */
2972 else if (GET_CODE (disp) == UNSPEC
2973 && XINT (disp, 1) == UNSPEC_POOL_OFFSET)
2974 ;
2975
2976 /* Accept literal pool references. */
2977 else if (GET_CODE (disp) == UNSPEC
2978 && XINT (disp, 1) == UNSPEC_LTREL_OFFSET)
2979 {
2980 /* In case CSE pulled a non literal pool reference out of
2981 the pool we have to reject the address. This is
2982 especially important when loading the GOT pointer on non
2983 zarch CPUs. In this case the literal pool contains an lt
2984 relative offset to the _GLOBAL_OFFSET_TABLE_ label which
2985 will most likely exceed the displacement. */
2986 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
2987 || !CONSTANT_POOL_ADDRESS_P (XVECEXP (disp, 0, 0)))
2988 return false;
2989
2990 orig_disp = gen_rtx_CONST (Pmode, disp);
2991 if (offset)
2992 {
2993 /* If we have an offset, make sure it does not
2994 exceed the size of the constant pool entry. */
2995 rtx sym = XVECEXP (disp, 0, 0);
2996 if (offset >= GET_MODE_SIZE (get_pool_mode (sym)))
2997 return false;
2998
2999 orig_disp = plus_constant (Pmode, orig_disp, offset);
3000 }
3001 }
3002
3003 else
3004 return false;
3005 }
3006
3007 if (!base && !indx)
3008 pointer = true;
3009
3010 if (out)
3011 {
3012 out->base = base;
3013 out->indx = indx;
3014 out->disp = orig_disp;
3015 out->pointer = pointer;
3016 out->literal_pool = literal_pool;
3017 }
3018
3019 return true;
3020 }
3021
3022 /* Decompose a RTL expression OP for an address style operand into its
3023 components, and return the base register in BASE and the offset in
3024 OFFSET. While OP looks like an address it is never supposed to be
3025 used as such.
3026
3027 Return true if OP is a valid address operand, false if not. */
3028
3029 bool
3030 s390_decompose_addrstyle_without_index (rtx op, rtx *base,
3031 HOST_WIDE_INT *offset)
3032 {
3033 rtx off = NULL_RTX;
3034
3035 /* We can have an integer constant, an address register,
3036 or a sum of the two. */
3037 if (CONST_SCALAR_INT_P (op))
3038 {
3039 off = op;
3040 op = NULL_RTX;
3041 }
3042 if (op && GET_CODE (op) == PLUS && CONST_SCALAR_INT_P (XEXP (op, 1)))
3043 {
3044 off = XEXP (op, 1);
3045 op = XEXP (op, 0);
3046 }
3047 while (op && GET_CODE (op) == SUBREG)
3048 op = SUBREG_REG (op);
3049
3050 if (op && GET_CODE (op) != REG)
3051 return false;
3052
3053 if (offset)
3054 {
3055 if (off == NULL_RTX)
3056 *offset = 0;
3057 else if (CONST_INT_P (off))
3058 *offset = INTVAL (off);
3059 else if (CONST_WIDE_INT_P (off))
3060 /* The offset will anyway be cut down to 12 bits so take just
3061 the lowest order chunk of the wide int. */
3062 *offset = CONST_WIDE_INT_ELT (off, 0);
3063 else
3064 gcc_unreachable ();
3065 }
3066 if (base)
3067 *base = op;
3068
3069 return true;
3070 }
3071
3072
3073 /* Return true if CODE is a valid address without index. */
3074
3075 bool
3076 s390_legitimate_address_without_index_p (rtx op)
3077 {
3078 struct s390_address addr;
3079
3080 if (!s390_decompose_address (XEXP (op, 0), &addr))
3081 return false;
3082 if (addr.indx)
3083 return false;
3084
3085 return true;
3086 }
3087
3088
3089 /* Return TRUE if ADDR is an operand valid for a load/store relative
3090 instruction. Be aware that the alignment of the operand needs to
3091 be checked separately.
3092 Valid addresses are single references or a sum of a reference and a
3093 constant integer. Return these parts in SYMREF and ADDEND. You can
3094 pass NULL in REF and/or ADDEND if you are not interested in these
3095 values. Literal pool references are *not* considered symbol
3096 references. */
3097
3098 static bool
3099 s390_loadrelative_operand_p (rtx addr, rtx *symref, HOST_WIDE_INT *addend)
3100 {
3101 HOST_WIDE_INT tmpaddend = 0;
3102
3103 if (GET_CODE (addr) == CONST)
3104 addr = XEXP (addr, 0);
3105
3106 if (GET_CODE (addr) == PLUS)
3107 {
3108 if (!CONST_INT_P (XEXP (addr, 1)))
3109 return false;
3110
3111 tmpaddend = INTVAL (XEXP (addr, 1));
3112 addr = XEXP (addr, 0);
3113 }
3114
3115 if ((GET_CODE (addr) == SYMBOL_REF && !CONSTANT_POOL_ADDRESS_P (addr))
3116 || (GET_CODE (addr) == UNSPEC
3117 && (XINT (addr, 1) == UNSPEC_GOTENT
3118 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
3119 {
3120 if (symref)
3121 *symref = addr;
3122 if (addend)
3123 *addend = tmpaddend;
3124
3125 return true;
3126 }
3127 return false;
3128 }
3129
3130 /* Return true if the address in OP is valid for constraint letter C
3131 if wrapped in a MEM rtx. Set LIT_POOL_OK to true if it literal
3132 pool MEMs should be accepted. Only the Q, R, S, T constraint
3133 letters are allowed for C. */
3134
3135 static int
3136 s390_check_qrst_address (char c, rtx op, bool lit_pool_ok)
3137 {
3138 struct s390_address addr;
3139 bool decomposed = false;
3140
3141 if (!address_operand (op, GET_MODE (op)))
3142 return 0;
3143
3144 /* This check makes sure that no symbolic address (except literal
3145 pool references) are accepted by the R or T constraints. */
3146 if (s390_loadrelative_operand_p (op, NULL, NULL))
3147 return 0;
3148
3149 /* Ensure literal pool references are only accepted if LIT_POOL_OK. */
3150 if (!lit_pool_ok)
3151 {
3152 if (!s390_decompose_address (op, &addr))
3153 return 0;
3154 if (addr.literal_pool)
3155 return 0;
3156 decomposed = true;
3157 }
3158
3159 /* With reload, we sometimes get intermediate address forms that are
3160 actually invalid as-is, but we need to accept them in the most
3161 generic cases below ('R' or 'T'), since reload will in fact fix
3162 them up. LRA behaves differently here; we never see such forms,
3163 but on the other hand, we need to strictly reject every invalid
3164 address form. Perform this check right up front. */
3165 if (lra_in_progress)
3166 {
3167 if (!decomposed && !s390_decompose_address (op, &addr))
3168 return 0;
3169 decomposed = true;
3170 }
3171
3172 switch (c)
3173 {
3174 case 'Q': /* no index short displacement */
3175 if (!decomposed && !s390_decompose_address (op, &addr))
3176 return 0;
3177 if (addr.indx)
3178 return 0;
3179 if (!s390_short_displacement (addr.disp))
3180 return 0;
3181 break;
3182
3183 case 'R': /* with index short displacement */
3184 if (TARGET_LONG_DISPLACEMENT)
3185 {
3186 if (!decomposed && !s390_decompose_address (op, &addr))
3187 return 0;
3188 if (!s390_short_displacement (addr.disp))
3189 return 0;
3190 }
3191 /* Any invalid address here will be fixed up by reload,
3192 so accept it for the most generic constraint. */
3193 break;
3194
3195 case 'S': /* no index long displacement */
3196 if (!decomposed && !s390_decompose_address (op, &addr))
3197 return 0;
3198 if (addr.indx)
3199 return 0;
3200 break;
3201
3202 case 'T': /* with index long displacement */
3203 /* Any invalid address here will be fixed up by reload,
3204 so accept it for the most generic constraint. */
3205 break;
3206
3207 default:
3208 return 0;
3209 }
3210 return 1;
3211 }
3212
3213
3214 /* Evaluates constraint strings described by the regular expression
3215 ([A|B|Z](Q|R|S|T))|Y and returns 1 if OP is a valid operand for
3216 the constraint given in STR, or 0 else. */
3217
3218 int
3219 s390_mem_constraint (const char *str, rtx op)
3220 {
3221 char c = str[0];
3222
3223 switch (c)
3224 {
3225 case 'A':
3226 /* Check for offsettable variants of memory constraints. */
3227 if (!MEM_P (op) || MEM_VOLATILE_P (op))
3228 return 0;
3229 if ((reload_completed || reload_in_progress)
3230 ? !offsettable_memref_p (op) : !offsettable_nonstrict_memref_p (op))
3231 return 0;
3232 return s390_check_qrst_address (str[1], XEXP (op, 0), true);
3233 case 'B':
3234 /* Check for non-literal-pool variants of memory constraints. */
3235 if (!MEM_P (op))
3236 return 0;
3237 return s390_check_qrst_address (str[1], XEXP (op, 0), false);
3238 case 'Q':
3239 case 'R':
3240 case 'S':
3241 case 'T':
3242 if (GET_CODE (op) != MEM)
3243 return 0;
3244 return s390_check_qrst_address (c, XEXP (op, 0), true);
3245 case 'Y':
3246 /* Simply check for the basic form of a shift count. Reload will
3247 take care of making sure we have a proper base register. */
3248 if (!s390_decompose_addrstyle_without_index (op, NULL, NULL))
3249 return 0;
3250 break;
3251 case 'Z':
3252 return s390_check_qrst_address (str[1], op, true);
3253 default:
3254 return 0;
3255 }
3256 return 1;
3257 }
3258
3259
3260 /* Evaluates constraint strings starting with letter O. Input
3261 parameter C is the second letter following the "O" in the constraint
3262 string. Returns 1 if VALUE meets the respective constraint and 0
3263 otherwise. */
3264
3265 int
3266 s390_O_constraint_str (const char c, HOST_WIDE_INT value)
3267 {
3268 if (!TARGET_EXTIMM)
3269 return 0;
3270
3271 switch (c)
3272 {
3273 case 's':
3274 return trunc_int_for_mode (value, SImode) == value;
3275
3276 case 'p':
3277 return value == 0
3278 || s390_single_part (GEN_INT (value), DImode, SImode, 0) == 1;
3279
3280 case 'n':
3281 return s390_single_part (GEN_INT (value - 1), DImode, SImode, -1) == 1;
3282
3283 default:
3284 gcc_unreachable ();
3285 }
3286 }
3287
3288
3289 /* Evaluates constraint strings starting with letter N. Parameter STR
3290 contains the letters following letter "N" in the constraint string.
3291 Returns true if VALUE matches the constraint. */
3292
3293 int
3294 s390_N_constraint_str (const char *str, HOST_WIDE_INT value)
3295 {
3296 machine_mode mode, part_mode;
3297 int def;
3298 int part, part_goal;
3299
3300
3301 if (str[0] == 'x')
3302 part_goal = -1;
3303 else
3304 part_goal = str[0] - '0';
3305
3306 switch (str[1])
3307 {
3308 case 'Q':
3309 part_mode = QImode;
3310 break;
3311 case 'H':
3312 part_mode = HImode;
3313 break;
3314 case 'S':
3315 part_mode = SImode;
3316 break;
3317 default:
3318 return 0;
3319 }
3320
3321 switch (str[2])
3322 {
3323 case 'H':
3324 mode = HImode;
3325 break;
3326 case 'S':
3327 mode = SImode;
3328 break;
3329 case 'D':
3330 mode = DImode;
3331 break;
3332 default:
3333 return 0;
3334 }
3335
3336 switch (str[3])
3337 {
3338 case '0':
3339 def = 0;
3340 break;
3341 case 'F':
3342 def = -1;
3343 break;
3344 default:
3345 return 0;
3346 }
3347
3348 if (GET_MODE_SIZE (mode) <= GET_MODE_SIZE (part_mode))
3349 return 0;
3350
3351 part = s390_single_part (GEN_INT (value), mode, part_mode, def);
3352 if (part < 0)
3353 return 0;
3354 if (part_goal != -1 && part_goal != part)
3355 return 0;
3356
3357 return 1;
3358 }
3359
3360
3361 /* Returns true if the input parameter VALUE is a float zero. */
3362
3363 int
3364 s390_float_const_zero_p (rtx value)
3365 {
3366 return (GET_MODE_CLASS (GET_MODE (value)) == MODE_FLOAT
3367 && value == CONST0_RTX (GET_MODE (value)));
3368 }
3369
3370 /* Implement TARGET_REGISTER_MOVE_COST. */
3371
3372 static int
3373 s390_register_move_cost (machine_mode mode,
3374 reg_class_t from, reg_class_t to)
3375 {
3376 /* On s390, copy between fprs and gprs is expensive. */
3377
3378 /* It becomes somewhat faster having ldgr/lgdr. */
3379 if (TARGET_Z10 && GET_MODE_SIZE (mode) == 8)
3380 {
3381 /* ldgr is single cycle. */
3382 if (reg_classes_intersect_p (from, GENERAL_REGS)
3383 && reg_classes_intersect_p (to, FP_REGS))
3384 return 1;
3385 /* lgdr needs 3 cycles. */
3386 if (reg_classes_intersect_p (to, GENERAL_REGS)
3387 && reg_classes_intersect_p (from, FP_REGS))
3388 return 3;
3389 }
3390
3391 /* Otherwise copying is done via memory. */
3392 if ((reg_classes_intersect_p (from, GENERAL_REGS)
3393 && reg_classes_intersect_p (to, FP_REGS))
3394 || (reg_classes_intersect_p (from, FP_REGS)
3395 && reg_classes_intersect_p (to, GENERAL_REGS)))
3396 return 10;
3397
3398 return 1;
3399 }
3400
3401 /* Implement TARGET_MEMORY_MOVE_COST. */
3402
3403 static int
3404 s390_memory_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
3405 reg_class_t rclass ATTRIBUTE_UNUSED,
3406 bool in ATTRIBUTE_UNUSED)
3407 {
3408 return 2;
3409 }
3410
3411 /* Compute a (partial) cost for rtx X. Return true if the complete
3412 cost has been computed, and false if subexpressions should be
3413 scanned. In either case, *TOTAL contains the cost result. The
3414 initial value of *TOTAL is the default value computed by
3415 rtx_cost. It may be left unmodified. OUTER_CODE contains the
3416 code of the superexpression of x. */
3417
3418 static bool
3419 s390_rtx_costs (rtx x, machine_mode mode, int outer_code,
3420 int opno ATTRIBUTE_UNUSED,
3421 int *total, bool speed ATTRIBUTE_UNUSED)
3422 {
3423 int code = GET_CODE (x);
3424 switch (code)
3425 {
3426 case CONST:
3427 case CONST_INT:
3428 case LABEL_REF:
3429 case SYMBOL_REF:
3430 case CONST_DOUBLE:
3431 case CONST_WIDE_INT:
3432 case MEM:
3433 *total = 0;
3434 return true;
3435
3436 case SET:
3437 {
3438 /* Without this a conditional move instruction would be
3439 accounted as 3 * COSTS_N_INSNS (set, if_then_else,
3440 comparison operator). That's a bit pessimistic. */
3441
3442 if (!TARGET_Z196 || GET_CODE (SET_SRC (x)) != IF_THEN_ELSE)
3443 return false;
3444
3445 rtx cond = XEXP (SET_SRC (x), 0);
3446
3447 if (!CC_REG_P (XEXP (cond, 0)) || !CONST_INT_P (XEXP (cond, 1)))
3448 return false;
3449
3450 /* It is going to be a load/store on condition. Make it
3451 slightly more expensive than a normal load. */
3452 *total = COSTS_N_INSNS (1) + 1;
3453
3454 rtx dst = SET_DEST (x);
3455 rtx then = XEXP (SET_SRC (x), 1);
3456 rtx els = XEXP (SET_SRC (x), 2);
3457
3458 /* It is a real IF-THEN-ELSE. An additional move will be
3459 needed to implement that. */
3460 if (reload_completed
3461 && !rtx_equal_p (dst, then)
3462 && !rtx_equal_p (dst, els))
3463 *total += COSTS_N_INSNS (1) / 2;
3464
3465 /* A minor penalty for constants we cannot directly handle. */
3466 if ((CONST_INT_P (then) || CONST_INT_P (els))
3467 && (!TARGET_Z13 || MEM_P (dst)
3468 || (CONST_INT_P (then) && !satisfies_constraint_K (then))
3469 || (CONST_INT_P (els) && !satisfies_constraint_K (els))))
3470 *total += COSTS_N_INSNS (1) / 2;
3471
3472 /* A store on condition can only handle register src operands. */
3473 if (MEM_P (dst) && (!REG_P (then) || !REG_P (els)))
3474 *total += COSTS_N_INSNS (1) / 2;
3475
3476 return true;
3477 }
3478 case IOR:
3479 /* risbg */
3480 if (GET_CODE (XEXP (x, 0)) == AND
3481 && GET_CODE (XEXP (x, 1)) == ASHIFT
3482 && REG_P (XEXP (XEXP (x, 0), 0))
3483 && REG_P (XEXP (XEXP (x, 1), 0))
3484 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3485 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
3486 && (UINTVAL (XEXP (XEXP (x, 0), 1)) ==
3487 (HOST_WIDE_INT_1U << UINTVAL (XEXP (XEXP (x, 1), 1))) - 1))
3488 {
3489 *total = COSTS_N_INSNS (2);
3490 return true;
3491 }
3492
3493 /* ~AND on a 128 bit mode. This can be done using a vector
3494 instruction. */
3495 if (TARGET_VXE
3496 && GET_CODE (XEXP (x, 0)) == NOT
3497 && GET_CODE (XEXP (x, 1)) == NOT
3498 && REG_P (XEXP (XEXP (x, 0), 0))
3499 && REG_P (XEXP (XEXP (x, 1), 0))
3500 && GET_MODE_SIZE (GET_MODE (XEXP (XEXP (x, 0), 0))) == 16
3501 && s390_hard_regno_mode_ok (VR0_REGNUM,
3502 GET_MODE (XEXP (XEXP (x, 0), 0))))
3503 {
3504 *total = COSTS_N_INSNS (1);
3505 return true;
3506 }
3507 /* fallthrough */
3508 case ASHIFT:
3509 case ASHIFTRT:
3510 case LSHIFTRT:
3511 case ROTATE:
3512 case ROTATERT:
3513 case AND:
3514 case XOR:
3515 case NEG:
3516 case NOT:
3517 *total = COSTS_N_INSNS (1);
3518 return false;
3519
3520 case PLUS:
3521 case MINUS:
3522 *total = COSTS_N_INSNS (1);
3523 return false;
3524
3525 case MULT:
3526 switch (mode)
3527 {
3528 case E_SImode:
3529 {
3530 rtx left = XEXP (x, 0);
3531 rtx right = XEXP (x, 1);
3532 if (GET_CODE (right) == CONST_INT
3533 && CONST_OK_FOR_K (INTVAL (right)))
3534 *total = s390_cost->mhi;
3535 else if (GET_CODE (left) == SIGN_EXTEND)
3536 *total = s390_cost->mh;
3537 else
3538 *total = s390_cost->ms; /* msr, ms, msy */
3539 break;
3540 }
3541 case E_DImode:
3542 {
3543 rtx left = XEXP (x, 0);
3544 rtx right = XEXP (x, 1);
3545 if (TARGET_ZARCH)
3546 {
3547 if (GET_CODE (right) == CONST_INT
3548 && CONST_OK_FOR_K (INTVAL (right)))
3549 *total = s390_cost->mghi;
3550 else if (GET_CODE (left) == SIGN_EXTEND)
3551 *total = s390_cost->msgf;
3552 else
3553 *total = s390_cost->msg; /* msgr, msg */
3554 }
3555 else /* TARGET_31BIT */
3556 {
3557 if (GET_CODE (left) == SIGN_EXTEND
3558 && GET_CODE (right) == SIGN_EXTEND)
3559 /* mulsidi case: mr, m */
3560 *total = s390_cost->m;
3561 else if (GET_CODE (left) == ZERO_EXTEND
3562 && GET_CODE (right) == ZERO_EXTEND
3563 && TARGET_CPU_ZARCH)
3564 /* umulsidi case: ml, mlr */
3565 *total = s390_cost->ml;
3566 else
3567 /* Complex calculation is required. */
3568 *total = COSTS_N_INSNS (40);
3569 }
3570 break;
3571 }
3572 case E_SFmode:
3573 case E_DFmode:
3574 *total = s390_cost->mult_df;
3575 break;
3576 case E_TFmode:
3577 *total = s390_cost->mxbr;
3578 break;
3579 default:
3580 return false;
3581 }
3582 return false;
3583
3584 case FMA:
3585 switch (mode)
3586 {
3587 case E_DFmode:
3588 *total = s390_cost->madbr;
3589 break;
3590 case E_SFmode:
3591 *total = s390_cost->maebr;
3592 break;
3593 default:
3594 return false;
3595 }
3596 /* Negate in the third argument is free: FMSUB. */
3597 if (GET_CODE (XEXP (x, 2)) == NEG)
3598 {
3599 *total += (rtx_cost (XEXP (x, 0), mode, FMA, 0, speed)
3600 + rtx_cost (XEXP (x, 1), mode, FMA, 1, speed)
3601 + rtx_cost (XEXP (XEXP (x, 2), 0), mode, FMA, 2, speed));
3602 return true;
3603 }
3604 return false;
3605
3606 case UDIV:
3607 case UMOD:
3608 if (mode == TImode) /* 128 bit division */
3609 *total = s390_cost->dlgr;
3610 else if (mode == DImode)
3611 {
3612 rtx right = XEXP (x, 1);
3613 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3614 *total = s390_cost->dlr;
3615 else /* 64 by 64 bit division */
3616 *total = s390_cost->dlgr;
3617 }
3618 else if (mode == SImode) /* 32 bit division */
3619 *total = s390_cost->dlr;
3620 return false;
3621
3622 case DIV:
3623 case MOD:
3624 if (mode == DImode)
3625 {
3626 rtx right = XEXP (x, 1);
3627 if (GET_CODE (right) == ZERO_EXTEND) /* 64 by 32 bit division */
3628 if (TARGET_ZARCH)
3629 *total = s390_cost->dsgfr;
3630 else
3631 *total = s390_cost->dr;
3632 else /* 64 by 64 bit division */
3633 *total = s390_cost->dsgr;
3634 }
3635 else if (mode == SImode) /* 32 bit division */
3636 *total = s390_cost->dlr;
3637 else if (mode == SFmode)
3638 {
3639 *total = s390_cost->debr;
3640 }
3641 else if (mode == DFmode)
3642 {
3643 *total = s390_cost->ddbr;
3644 }
3645 else if (mode == TFmode)
3646 {
3647 *total = s390_cost->dxbr;
3648 }
3649 return false;
3650
3651 case SQRT:
3652 if (mode == SFmode)
3653 *total = s390_cost->sqebr;
3654 else if (mode == DFmode)
3655 *total = s390_cost->sqdbr;
3656 else /* TFmode */
3657 *total = s390_cost->sqxbr;
3658 return false;
3659
3660 case SIGN_EXTEND:
3661 case ZERO_EXTEND:
3662 if (outer_code == MULT || outer_code == DIV || outer_code == MOD
3663 || outer_code == PLUS || outer_code == MINUS
3664 || outer_code == COMPARE)
3665 *total = 0;
3666 return false;
3667
3668 case COMPARE:
3669 *total = COSTS_N_INSNS (1);
3670 if (GET_CODE (XEXP (x, 0)) == AND
3671 && GET_CODE (XEXP (x, 1)) == CONST_INT
3672 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT)
3673 {
3674 rtx op0 = XEXP (XEXP (x, 0), 0);
3675 rtx op1 = XEXP (XEXP (x, 0), 1);
3676 rtx op2 = XEXP (x, 1);
3677
3678 if (memory_operand (op0, GET_MODE (op0))
3679 && s390_tm_ccmode (op1, op2, 0) != VOIDmode)
3680 return true;
3681 if (register_operand (op0, GET_MODE (op0))
3682 && s390_tm_ccmode (op1, op2, 1) != VOIDmode)
3683 return true;
3684 }
3685 return false;
3686
3687 default:
3688 return false;
3689 }
3690 }
3691
3692 /* Return the cost of an address rtx ADDR. */
3693
3694 static int
3695 s390_address_cost (rtx addr, machine_mode mode ATTRIBUTE_UNUSED,
3696 addr_space_t as ATTRIBUTE_UNUSED,
3697 bool speed ATTRIBUTE_UNUSED)
3698 {
3699 struct s390_address ad;
3700 if (!s390_decompose_address (addr, &ad))
3701 return 1000;
3702
3703 return ad.indx? COSTS_N_INSNS (1) + 1 : COSTS_N_INSNS (1);
3704 }
3705
3706 /* Implement targetm.vectorize.builtin_vectorization_cost. */
3707 static int
3708 s390_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
3709 tree vectype,
3710 int misalign ATTRIBUTE_UNUSED)
3711 {
3712 switch (type_of_cost)
3713 {
3714 case scalar_stmt:
3715 case scalar_load:
3716 case scalar_store:
3717 case vector_stmt:
3718 case vector_load:
3719 case vector_store:
3720 case vec_to_scalar:
3721 case scalar_to_vec:
3722 case cond_branch_not_taken:
3723 case vec_perm:
3724 case vec_promote_demote:
3725 case unaligned_load:
3726 case unaligned_store:
3727 return 1;
3728
3729 case cond_branch_taken:
3730 return 3;
3731
3732 case vec_construct:
3733 return TYPE_VECTOR_SUBPARTS (vectype) - 1;
3734
3735 default:
3736 gcc_unreachable ();
3737 }
3738 }
3739
3740 /* If OP is a SYMBOL_REF of a thread-local symbol, return its TLS mode,
3741 otherwise return 0. */
3742
3743 int
3744 tls_symbolic_operand (rtx op)
3745 {
3746 if (GET_CODE (op) != SYMBOL_REF)
3747 return 0;
3748 return SYMBOL_REF_TLS_MODEL (op);
3749 }
3750 \f
3751 /* Split DImode access register reference REG (on 64-bit) into its constituent
3752 low and high parts, and store them into LO and HI. Note that gen_lowpart/
3753 gen_highpart cannot be used as they assume all registers are word-sized,
3754 while our access registers have only half that size. */
3755
3756 void
3757 s390_split_access_reg (rtx reg, rtx *lo, rtx *hi)
3758 {
3759 gcc_assert (TARGET_64BIT);
3760 gcc_assert (ACCESS_REG_P (reg));
3761 gcc_assert (GET_MODE (reg) == DImode);
3762 gcc_assert (!(REGNO (reg) & 1));
3763
3764 *lo = gen_rtx_REG (SImode, REGNO (reg) + 1);
3765 *hi = gen_rtx_REG (SImode, REGNO (reg));
3766 }
3767
3768 /* Return true if OP contains a symbol reference */
3769
3770 bool
3771 symbolic_reference_mentioned_p (rtx op)
3772 {
3773 const char *fmt;
3774 int i;
3775
3776 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
3777 return 1;
3778
3779 fmt = GET_RTX_FORMAT (GET_CODE (op));
3780 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3781 {
3782 if (fmt[i] == 'E')
3783 {
3784 int j;
3785
3786 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3787 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3788 return 1;
3789 }
3790
3791 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
3792 return 1;
3793 }
3794
3795 return 0;
3796 }
3797
3798 /* Return true if OP contains a reference to a thread-local symbol. */
3799
3800 bool
3801 tls_symbolic_reference_mentioned_p (rtx op)
3802 {
3803 const char *fmt;
3804 int i;
3805
3806 if (GET_CODE (op) == SYMBOL_REF)
3807 return tls_symbolic_operand (op);
3808
3809 fmt = GET_RTX_FORMAT (GET_CODE (op));
3810 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
3811 {
3812 if (fmt[i] == 'E')
3813 {
3814 int j;
3815
3816 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
3817 if (tls_symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
3818 return true;
3819 }
3820
3821 else if (fmt[i] == 'e' && tls_symbolic_reference_mentioned_p (XEXP (op, i)))
3822 return true;
3823 }
3824
3825 return false;
3826 }
3827
3828
3829 /* Return true if OP is a legitimate general operand when
3830 generating PIC code. It is given that flag_pic is on
3831 and that OP satisfies CONSTANT_P. */
3832
3833 int
3834 legitimate_pic_operand_p (rtx op)
3835 {
3836 /* Accept all non-symbolic constants. */
3837 if (!SYMBOLIC_CONST (op))
3838 return 1;
3839
3840 /* Reject everything else; must be handled
3841 via emit_symbolic_move. */
3842 return 0;
3843 }
3844
3845 /* Returns true if the constant value OP is a legitimate general operand.
3846 It is given that OP satisfies CONSTANT_P. */
3847
3848 static bool
3849 s390_legitimate_constant_p (machine_mode mode, rtx op)
3850 {
3851 if (TARGET_VX && VECTOR_MODE_P (mode) && GET_CODE (op) == CONST_VECTOR)
3852 {
3853 if (GET_MODE_SIZE (mode) != 16)
3854 return 0;
3855
3856 if (!satisfies_constraint_j00 (op)
3857 && !satisfies_constraint_jm1 (op)
3858 && !satisfies_constraint_jKK (op)
3859 && !satisfies_constraint_jxx (op)
3860 && !satisfies_constraint_jyy (op))
3861 return 0;
3862 }
3863
3864 /* Accept all non-symbolic constants. */
3865 if (!SYMBOLIC_CONST (op))
3866 return 1;
3867
3868 /* Accept immediate LARL operands. */
3869 if (TARGET_CPU_ZARCH && larl_operand (op, mode))
3870 return 1;
3871
3872 /* Thread-local symbols are never legal constants. This is
3873 so that emit_call knows that computing such addresses
3874 might require a function call. */
3875 if (TLS_SYMBOLIC_CONST (op))
3876 return 0;
3877
3878 /* In the PIC case, symbolic constants must *not* be
3879 forced into the literal pool. We accept them here,
3880 so that they will be handled by emit_symbolic_move. */
3881 if (flag_pic)
3882 return 1;
3883
3884 /* All remaining non-PIC symbolic constants are
3885 forced into the literal pool. */
3886 return 0;
3887 }
3888
3889 /* Determine if it's legal to put X into the constant pool. This
3890 is not possible if X contains the address of a symbol that is
3891 not constant (TLS) or not known at final link time (PIC). */
3892
3893 static bool
3894 s390_cannot_force_const_mem (machine_mode mode, rtx x)
3895 {
3896 switch (GET_CODE (x))
3897 {
3898 case CONST_INT:
3899 case CONST_DOUBLE:
3900 case CONST_WIDE_INT:
3901 case CONST_VECTOR:
3902 /* Accept all non-symbolic constants. */
3903 return false;
3904
3905 case LABEL_REF:
3906 /* Labels are OK iff we are non-PIC. */
3907 return flag_pic != 0;
3908
3909 case SYMBOL_REF:
3910 /* 'Naked' TLS symbol references are never OK,
3911 non-TLS symbols are OK iff we are non-PIC. */
3912 if (tls_symbolic_operand (x))
3913 return true;
3914 else
3915 return flag_pic != 0;
3916
3917 case CONST:
3918 return s390_cannot_force_const_mem (mode, XEXP (x, 0));
3919 case PLUS:
3920 case MINUS:
3921 return s390_cannot_force_const_mem (mode, XEXP (x, 0))
3922 || s390_cannot_force_const_mem (mode, XEXP (x, 1));
3923
3924 case UNSPEC:
3925 switch (XINT (x, 1))
3926 {
3927 /* Only lt-relative or GOT-relative UNSPECs are OK. */
3928 case UNSPEC_LTREL_OFFSET:
3929 case UNSPEC_GOT:
3930 case UNSPEC_GOTOFF:
3931 case UNSPEC_PLTOFF:
3932 case UNSPEC_TLSGD:
3933 case UNSPEC_TLSLDM:
3934 case UNSPEC_NTPOFF:
3935 case UNSPEC_DTPOFF:
3936 case UNSPEC_GOTNTPOFF:
3937 case UNSPEC_INDNTPOFF:
3938 return false;
3939
3940 /* If the literal pool shares the code section, be put
3941 execute template placeholders into the pool as well. */
3942 case UNSPEC_INSN:
3943 return TARGET_CPU_ZARCH;
3944
3945 default:
3946 return true;
3947 }
3948 break;
3949
3950 default:
3951 gcc_unreachable ();
3952 }
3953 }
3954
3955 /* Returns true if the constant value OP is a legitimate general
3956 operand during and after reload. The difference to
3957 legitimate_constant_p is that this function will not accept
3958 a constant that would need to be forced to the literal pool
3959 before it can be used as operand.
3960 This function accepts all constants which can be loaded directly
3961 into a GPR. */
3962
3963 bool
3964 legitimate_reload_constant_p (rtx op)
3965 {
3966 /* Accept la(y) operands. */
3967 if (GET_CODE (op) == CONST_INT
3968 && DISP_IN_RANGE (INTVAL (op)))
3969 return true;
3970
3971 /* Accept l(g)hi/l(g)fi operands. */
3972 if (GET_CODE (op) == CONST_INT
3973 && (CONST_OK_FOR_K (INTVAL (op)) || CONST_OK_FOR_Os (INTVAL (op))))
3974 return true;
3975
3976 /* Accept lliXX operands. */
3977 if (TARGET_ZARCH
3978 && GET_CODE (op) == CONST_INT
3979 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3980 && s390_single_part (op, word_mode, HImode, 0) >= 0)
3981 return true;
3982
3983 if (TARGET_EXTIMM
3984 && GET_CODE (op) == CONST_INT
3985 && trunc_int_for_mode (INTVAL (op), word_mode) == INTVAL (op)
3986 && s390_single_part (op, word_mode, SImode, 0) >= 0)
3987 return true;
3988
3989 /* Accept larl operands. */
3990 if (TARGET_CPU_ZARCH
3991 && larl_operand (op, VOIDmode))
3992 return true;
3993
3994 /* Accept floating-point zero operands that fit into a single GPR. */
3995 if (GET_CODE (op) == CONST_DOUBLE
3996 && s390_float_const_zero_p (op)
3997 && GET_MODE_SIZE (GET_MODE (op)) <= UNITS_PER_WORD)
3998 return true;
3999
4000 /* Accept double-word operands that can be split. */
4001 if (GET_CODE (op) == CONST_WIDE_INT
4002 || (GET_CODE (op) == CONST_INT
4003 && trunc_int_for_mode (INTVAL (op), word_mode) != INTVAL (op)))
4004 {
4005 machine_mode dword_mode = word_mode == SImode ? DImode : TImode;
4006 rtx hi = operand_subword (op, 0, 0, dword_mode);
4007 rtx lo = operand_subword (op, 1, 0, dword_mode);
4008 return legitimate_reload_constant_p (hi)
4009 && legitimate_reload_constant_p (lo);
4010 }
4011
4012 /* Everything else cannot be handled without reload. */
4013 return false;
4014 }
4015
4016 /* Returns true if the constant value OP is a legitimate fp operand
4017 during and after reload.
4018 This function accepts all constants which can be loaded directly
4019 into an FPR. */
4020
4021 static bool
4022 legitimate_reload_fp_constant_p (rtx op)
4023 {
4024 /* Accept floating-point zero operands if the load zero instruction
4025 can be used. Prior to z196 the load fp zero instruction caused a
4026 performance penalty if the result is used as BFP number. */
4027 if (TARGET_Z196
4028 && GET_CODE (op) == CONST_DOUBLE
4029 && s390_float_const_zero_p (op))
4030 return true;
4031
4032 return false;
4033 }
4034
4035 /* Returns true if the constant value OP is a legitimate vector operand
4036 during and after reload.
4037 This function accepts all constants which can be loaded directly
4038 into an VR. */
4039
4040 static bool
4041 legitimate_reload_vector_constant_p (rtx op)
4042 {
4043 if (TARGET_VX && GET_MODE_SIZE (GET_MODE (op)) == 16
4044 && (satisfies_constraint_j00 (op)
4045 || satisfies_constraint_jm1 (op)
4046 || satisfies_constraint_jKK (op)
4047 || satisfies_constraint_jxx (op)
4048 || satisfies_constraint_jyy (op)))
4049 return true;
4050
4051 return false;
4052 }
4053
4054 /* Given an rtx OP being reloaded into a reg required to be in class RCLASS,
4055 return the class of reg to actually use. */
4056
4057 static reg_class_t
4058 s390_preferred_reload_class (rtx op, reg_class_t rclass)
4059 {
4060 switch (GET_CODE (op))
4061 {
4062 /* Constants we cannot reload into general registers
4063 must be forced into the literal pool. */
4064 case CONST_VECTOR:
4065 case CONST_DOUBLE:
4066 case CONST_INT:
4067 case CONST_WIDE_INT:
4068 if (reg_class_subset_p (GENERAL_REGS, rclass)
4069 && legitimate_reload_constant_p (op))
4070 return GENERAL_REGS;
4071 else if (reg_class_subset_p (ADDR_REGS, rclass)
4072 && legitimate_reload_constant_p (op))
4073 return ADDR_REGS;
4074 else if (reg_class_subset_p (FP_REGS, rclass)
4075 && legitimate_reload_fp_constant_p (op))
4076 return FP_REGS;
4077 else if (reg_class_subset_p (VEC_REGS, rclass)
4078 && legitimate_reload_vector_constant_p (op))
4079 return VEC_REGS;
4080
4081 return NO_REGS;
4082
4083 /* If a symbolic constant or a PLUS is reloaded,
4084 it is most likely being used as an address, so
4085 prefer ADDR_REGS. If 'class' is not a superset
4086 of ADDR_REGS, e.g. FP_REGS, reject this reload. */
4087 case CONST:
4088 /* Symrefs cannot be pushed into the literal pool with -fPIC
4089 so we *MUST NOT* return NO_REGS for these cases
4090 (s390_cannot_force_const_mem will return true).
4091
4092 On the other hand we MUST return NO_REGS for symrefs with
4093 invalid addend which might have been pushed to the literal
4094 pool (no -fPIC). Usually we would expect them to be
4095 handled via secondary reload but this does not happen if
4096 they are used as literal pool slot replacement in reload
4097 inheritance (see emit_input_reload_insns). */
4098 if (TARGET_CPU_ZARCH
4099 && GET_CODE (XEXP (op, 0)) == PLUS
4100 && GET_CODE (XEXP (XEXP(op, 0), 0)) == SYMBOL_REF
4101 && GET_CODE (XEXP (XEXP(op, 0), 1)) == CONST_INT)
4102 {
4103 if (flag_pic && reg_class_subset_p (ADDR_REGS, rclass))
4104 return ADDR_REGS;
4105 else
4106 return NO_REGS;
4107 }
4108 /* fallthrough */
4109 case LABEL_REF:
4110 case SYMBOL_REF:
4111 if (!legitimate_reload_constant_p (op))
4112 return NO_REGS;
4113 /* fallthrough */
4114 case PLUS:
4115 /* load address will be used. */
4116 if (reg_class_subset_p (ADDR_REGS, rclass))
4117 return ADDR_REGS;
4118 else
4119 return NO_REGS;
4120
4121 default:
4122 break;
4123 }
4124
4125 return rclass;
4126 }
4127
4128 /* Return true if ADDR is SYMBOL_REF + addend with addend being a
4129 multiple of ALIGNMENT and the SYMBOL_REF being naturally
4130 aligned. */
4131
4132 bool
4133 s390_check_symref_alignment (rtx addr, HOST_WIDE_INT alignment)
4134 {
4135 HOST_WIDE_INT addend;
4136 rtx symref;
4137
4138 /* The "required alignment" might be 0 (e.g. for certain structs
4139 accessed via BLKmode). Early abort in this case, as well as when
4140 an alignment > 8 is required. */
4141 if (alignment < 2 || alignment > 8)
4142 return false;
4143
4144 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4145 return false;
4146
4147 if (addend & (alignment - 1))
4148 return false;
4149
4150 if (GET_CODE (symref) == SYMBOL_REF)
4151 {
4152 /* We have load-relative instructions for 2-byte, 4-byte, and
4153 8-byte alignment so allow only these. */
4154 switch (alignment)
4155 {
4156 case 8: return !SYMBOL_FLAG_NOTALIGN8_P (symref);
4157 case 4: return !SYMBOL_FLAG_NOTALIGN4_P (symref);
4158 case 2: return !SYMBOL_FLAG_NOTALIGN2_P (symref);
4159 default: return false;
4160 }
4161 }
4162
4163 if (GET_CODE (symref) == UNSPEC
4164 && alignment <= UNITS_PER_LONG)
4165 return true;
4166
4167 return false;
4168 }
4169
4170 /* ADDR is moved into REG using larl. If ADDR isn't a valid larl
4171 operand SCRATCH is used to reload the even part of the address and
4172 adding one. */
4173
4174 void
4175 s390_reload_larl_operand (rtx reg, rtx addr, rtx scratch)
4176 {
4177 HOST_WIDE_INT addend;
4178 rtx symref;
4179
4180 if (!s390_loadrelative_operand_p (addr, &symref, &addend))
4181 gcc_unreachable ();
4182
4183 if (!(addend & 1))
4184 /* Easy case. The addend is even so larl will do fine. */
4185 emit_move_insn (reg, addr);
4186 else
4187 {
4188 /* We can leave the scratch register untouched if the target
4189 register is a valid base register. */
4190 if (REGNO (reg) < FIRST_PSEUDO_REGISTER
4191 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS)
4192 scratch = reg;
4193
4194 gcc_assert (REGNO (scratch) < FIRST_PSEUDO_REGISTER);
4195 gcc_assert (REGNO_REG_CLASS (REGNO (scratch)) == ADDR_REGS);
4196
4197 if (addend != 1)
4198 emit_move_insn (scratch,
4199 gen_rtx_CONST (Pmode,
4200 gen_rtx_PLUS (Pmode, symref,
4201 GEN_INT (addend - 1))));
4202 else
4203 emit_move_insn (scratch, symref);
4204
4205 /* Increment the address using la in order to avoid clobbering cc. */
4206 s390_load_address (reg, gen_rtx_PLUS (Pmode, scratch, const1_rtx));
4207 }
4208 }
4209
4210 /* Generate what is necessary to move between REG and MEM using
4211 SCRATCH. The direction is given by TOMEM. */
4212
4213 void
4214 s390_reload_symref_address (rtx reg, rtx mem, rtx scratch, bool tomem)
4215 {
4216 /* Reload might have pulled a constant out of the literal pool.
4217 Force it back in. */
4218 if (CONST_INT_P (mem) || GET_CODE (mem) == CONST_DOUBLE
4219 || GET_CODE (mem) == CONST_WIDE_INT
4220 || GET_CODE (mem) == CONST_VECTOR
4221 || GET_CODE (mem) == CONST)
4222 mem = force_const_mem (GET_MODE (reg), mem);
4223
4224 gcc_assert (MEM_P (mem));
4225
4226 /* For a load from memory we can leave the scratch register
4227 untouched if the target register is a valid base register. */
4228 if (!tomem
4229 && REGNO (reg) < FIRST_PSEUDO_REGISTER
4230 && REGNO_REG_CLASS (REGNO (reg)) == ADDR_REGS
4231 && GET_MODE (reg) == GET_MODE (scratch))
4232 scratch = reg;
4233
4234 /* Load address into scratch register. Since we can't have a
4235 secondary reload for a secondary reload we have to cover the case
4236 where larl would need a secondary reload here as well. */
4237 s390_reload_larl_operand (scratch, XEXP (mem, 0), scratch);
4238
4239 /* Now we can use a standard load/store to do the move. */
4240 if (tomem)
4241 emit_move_insn (replace_equiv_address (mem, scratch), reg);
4242 else
4243 emit_move_insn (reg, replace_equiv_address (mem, scratch));
4244 }
4245
4246 /* Inform reload about cases where moving X with a mode MODE to a register in
4247 RCLASS requires an extra scratch or immediate register. Return the class
4248 needed for the immediate register. */
4249
4250 static reg_class_t
4251 s390_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
4252 machine_mode mode, secondary_reload_info *sri)
4253 {
4254 enum reg_class rclass = (enum reg_class) rclass_i;
4255
4256 /* Intermediate register needed. */
4257 if (reg_classes_intersect_p (CC_REGS, rclass))
4258 return GENERAL_REGS;
4259
4260 if (TARGET_VX)
4261 {
4262 /* The vst/vl vector move instructions allow only for short
4263 displacements. */
4264 if (MEM_P (x)
4265 && GET_CODE (XEXP (x, 0)) == PLUS
4266 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4267 && !SHORT_DISP_IN_RANGE(INTVAL (XEXP (XEXP (x, 0), 1)))
4268 && reg_class_subset_p (rclass, VEC_REGS)
4269 && (!reg_class_subset_p (rclass, FP_REGS)
4270 || (GET_MODE_SIZE (mode) > 8
4271 && s390_class_max_nregs (FP_REGS, mode) == 1)))
4272 {
4273 if (in_p)
4274 sri->icode = (TARGET_64BIT ?
4275 CODE_FOR_reloaddi_la_in :
4276 CODE_FOR_reloadsi_la_in);
4277 else
4278 sri->icode = (TARGET_64BIT ?
4279 CODE_FOR_reloaddi_la_out :
4280 CODE_FOR_reloadsi_la_out);
4281 }
4282 }
4283
4284 if (TARGET_Z10)
4285 {
4286 HOST_WIDE_INT offset;
4287 rtx symref;
4288
4289 /* On z10 several optimizer steps may generate larl operands with
4290 an odd addend. */
4291 if (in_p
4292 && s390_loadrelative_operand_p (x, &symref, &offset)
4293 && mode == Pmode
4294 && !SYMBOL_FLAG_NOTALIGN2_P (symref)
4295 && (offset & 1) == 1)
4296 sri->icode = ((mode == DImode) ? CODE_FOR_reloaddi_larl_odd_addend_z10
4297 : CODE_FOR_reloadsi_larl_odd_addend_z10);
4298
4299 /* Handle all the (mem (symref)) accesses we cannot use the z10
4300 instructions for. */
4301 if (MEM_P (x)
4302 && s390_loadrelative_operand_p (XEXP (x, 0), NULL, NULL)
4303 && (mode == QImode
4304 || !reg_class_subset_p (rclass, GENERAL_REGS)
4305 || GET_MODE_SIZE (mode) > UNITS_PER_WORD
4306 || !s390_check_symref_alignment (XEXP (x, 0),
4307 GET_MODE_SIZE (mode))))
4308 {
4309 #define __SECONDARY_RELOAD_CASE(M,m) \
4310 case E_##M##mode: \
4311 if (TARGET_64BIT) \
4312 sri->icode = in_p ? CODE_FOR_reload##m##di_toreg_z10 : \
4313 CODE_FOR_reload##m##di_tomem_z10; \
4314 else \
4315 sri->icode = in_p ? CODE_FOR_reload##m##si_toreg_z10 : \
4316 CODE_FOR_reload##m##si_tomem_z10; \
4317 break;
4318
4319 switch (GET_MODE (x))
4320 {
4321 __SECONDARY_RELOAD_CASE (QI, qi);
4322 __SECONDARY_RELOAD_CASE (HI, hi);
4323 __SECONDARY_RELOAD_CASE (SI, si);
4324 __SECONDARY_RELOAD_CASE (DI, di);
4325 __SECONDARY_RELOAD_CASE (TI, ti);
4326 __SECONDARY_RELOAD_CASE (SF, sf);
4327 __SECONDARY_RELOAD_CASE (DF, df);
4328 __SECONDARY_RELOAD_CASE (TF, tf);
4329 __SECONDARY_RELOAD_CASE (SD, sd);
4330 __SECONDARY_RELOAD_CASE (DD, dd);
4331 __SECONDARY_RELOAD_CASE (TD, td);
4332 __SECONDARY_RELOAD_CASE (V1QI, v1qi);
4333 __SECONDARY_RELOAD_CASE (V2QI, v2qi);
4334 __SECONDARY_RELOAD_CASE (V4QI, v4qi);
4335 __SECONDARY_RELOAD_CASE (V8QI, v8qi);
4336 __SECONDARY_RELOAD_CASE (V16QI, v16qi);
4337 __SECONDARY_RELOAD_CASE (V1HI, v1hi);
4338 __SECONDARY_RELOAD_CASE (V2HI, v2hi);
4339 __SECONDARY_RELOAD_CASE (V4HI, v4hi);
4340 __SECONDARY_RELOAD_CASE (V8HI, v8hi);
4341 __SECONDARY_RELOAD_CASE (V1SI, v1si);
4342 __SECONDARY_RELOAD_CASE (V2SI, v2si);
4343 __SECONDARY_RELOAD_CASE (V4SI, v4si);
4344 __SECONDARY_RELOAD_CASE (V1DI, v1di);
4345 __SECONDARY_RELOAD_CASE (V2DI, v2di);
4346 __SECONDARY_RELOAD_CASE (V1TI, v1ti);
4347 __SECONDARY_RELOAD_CASE (V1SF, v1sf);
4348 __SECONDARY_RELOAD_CASE (V2SF, v2sf);
4349 __SECONDARY_RELOAD_CASE (V4SF, v4sf);
4350 __SECONDARY_RELOAD_CASE (V1DF, v1df);
4351 __SECONDARY_RELOAD_CASE (V2DF, v2df);
4352 __SECONDARY_RELOAD_CASE (V1TF, v1tf);
4353 default:
4354 gcc_unreachable ();
4355 }
4356 #undef __SECONDARY_RELOAD_CASE
4357 }
4358 }
4359
4360 /* We need a scratch register when loading a PLUS expression which
4361 is not a legitimate operand of the LOAD ADDRESS instruction. */
4362 /* LRA can deal with transformation of plus op very well -- so we
4363 don't need to prompt LRA in this case. */
4364 if (! lra_in_progress && in_p && s390_plus_operand (x, mode))
4365 sri->icode = (TARGET_64BIT ?
4366 CODE_FOR_reloaddi_plus : CODE_FOR_reloadsi_plus);
4367
4368 /* Performing a multiword move from or to memory we have to make sure the
4369 second chunk in memory is addressable without causing a displacement
4370 overflow. If that would be the case we calculate the address in
4371 a scratch register. */
4372 if (MEM_P (x)
4373 && GET_CODE (XEXP (x, 0)) == PLUS
4374 && GET_CODE (XEXP (XEXP (x, 0), 1)) == CONST_INT
4375 && !DISP_IN_RANGE (INTVAL (XEXP (XEXP (x, 0), 1))
4376 + GET_MODE_SIZE (mode) - 1))
4377 {
4378 /* For GENERAL_REGS a displacement overflow is no problem if occurring
4379 in a s_operand address since we may fallback to lm/stm. So we only
4380 have to care about overflows in the b+i+d case. */
4381 if ((reg_classes_intersect_p (GENERAL_REGS, rclass)
4382 && s390_class_max_nregs (GENERAL_REGS, mode) > 1
4383 && GET_CODE (XEXP (XEXP (x, 0), 0)) == PLUS)
4384 /* For FP_REGS no lm/stm is available so this check is triggered
4385 for displacement overflows in b+i+d and b+d like addresses. */
4386 || (reg_classes_intersect_p (FP_REGS, rclass)
4387 && s390_class_max_nregs (FP_REGS, mode) > 1))
4388 {
4389 if (in_p)
4390 sri->icode = (TARGET_64BIT ?
4391 CODE_FOR_reloaddi_la_in :
4392 CODE_FOR_reloadsi_la_in);
4393 else
4394 sri->icode = (TARGET_64BIT ?
4395 CODE_FOR_reloaddi_la_out :
4396 CODE_FOR_reloadsi_la_out);
4397 }
4398 }
4399
4400 /* A scratch address register is needed when a symbolic constant is
4401 copied to r0 compiling with -fPIC. In other cases the target
4402 register might be used as temporary (see legitimize_pic_address). */
4403 if (in_p && SYMBOLIC_CONST (x) && flag_pic == 2 && rclass != ADDR_REGS)
4404 sri->icode = (TARGET_64BIT ?
4405 CODE_FOR_reloaddi_PIC_addr :
4406 CODE_FOR_reloadsi_PIC_addr);
4407
4408 /* Either scratch or no register needed. */
4409 return NO_REGS;
4410 }
4411
4412 /* Generate code to load SRC, which is PLUS that is not a
4413 legitimate operand for the LA instruction, into TARGET.
4414 SCRATCH may be used as scratch register. */
4415
4416 void
4417 s390_expand_plus_operand (rtx target, rtx src,
4418 rtx scratch)
4419 {
4420 rtx sum1, sum2;
4421 struct s390_address ad;
4422
4423 /* src must be a PLUS; get its two operands. */
4424 gcc_assert (GET_CODE (src) == PLUS);
4425 gcc_assert (GET_MODE (src) == Pmode);
4426
4427 /* Check if any of the two operands is already scheduled
4428 for replacement by reload. This can happen e.g. when
4429 float registers occur in an address. */
4430 sum1 = find_replacement (&XEXP (src, 0));
4431 sum2 = find_replacement (&XEXP (src, 1));
4432 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4433
4434 /* If the address is already strictly valid, there's nothing to do. */
4435 if (!s390_decompose_address (src, &ad)
4436 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4437 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
4438 {
4439 /* Otherwise, one of the operands cannot be an address register;
4440 we reload its value into the scratch register. */
4441 if (true_regnum (sum1) < 1 || true_regnum (sum1) > 15)
4442 {
4443 emit_move_insn (scratch, sum1);
4444 sum1 = scratch;
4445 }
4446 if (true_regnum (sum2) < 1 || true_regnum (sum2) > 15)
4447 {
4448 emit_move_insn (scratch, sum2);
4449 sum2 = scratch;
4450 }
4451
4452 /* According to the way these invalid addresses are generated
4453 in reload.c, it should never happen (at least on s390) that
4454 *neither* of the PLUS components, after find_replacements
4455 was applied, is an address register. */
4456 if (sum1 == scratch && sum2 == scratch)
4457 {
4458 debug_rtx (src);
4459 gcc_unreachable ();
4460 }
4461
4462 src = gen_rtx_PLUS (Pmode, sum1, sum2);
4463 }
4464
4465 /* Emit the LOAD ADDRESS pattern. Note that reload of PLUS
4466 is only ever performed on addresses, so we can mark the
4467 sum as legitimate for LA in any case. */
4468 s390_load_address (target, src);
4469 }
4470
4471
4472 /* Return true if ADDR is a valid memory address.
4473 STRICT specifies whether strict register checking applies. */
4474
4475 static bool
4476 s390_legitimate_address_p (machine_mode mode, rtx addr, bool strict)
4477 {
4478 struct s390_address ad;
4479
4480 if (TARGET_Z10
4481 && larl_operand (addr, VOIDmode)
4482 && (mode == VOIDmode
4483 || s390_check_symref_alignment (addr, GET_MODE_SIZE (mode))))
4484 return true;
4485
4486 if (!s390_decompose_address (addr, &ad))
4487 return false;
4488
4489 if (strict)
4490 {
4491 if (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
4492 return false;
4493
4494 if (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx)))
4495 return false;
4496 }
4497 else
4498 {
4499 if (ad.base
4500 && !(REGNO (ad.base) >= FIRST_PSEUDO_REGISTER
4501 || REGNO_REG_CLASS (REGNO (ad.base)) == ADDR_REGS))
4502 return false;
4503
4504 if (ad.indx
4505 && !(REGNO (ad.indx) >= FIRST_PSEUDO_REGISTER
4506 || REGNO_REG_CLASS (REGNO (ad.indx)) == ADDR_REGS))
4507 return false;
4508 }
4509 return true;
4510 }
4511
4512 /* Return true if OP is a valid operand for the LA instruction.
4513 In 31-bit, we need to prove that the result is used as an
4514 address, as LA performs only a 31-bit addition. */
4515
4516 bool
4517 legitimate_la_operand_p (rtx op)
4518 {
4519 struct s390_address addr;
4520 if (!s390_decompose_address (op, &addr))
4521 return false;
4522
4523 return (TARGET_64BIT || addr.pointer);
4524 }
4525
4526 /* Return true if it is valid *and* preferable to use LA to
4527 compute the sum of OP1 and OP2. */
4528
4529 bool
4530 preferred_la_operand_p (rtx op1, rtx op2)
4531 {
4532 struct s390_address addr;
4533
4534 if (op2 != const0_rtx)
4535 op1 = gen_rtx_PLUS (Pmode, op1, op2);
4536
4537 if (!s390_decompose_address (op1, &addr))
4538 return false;
4539 if (addr.base && !REGNO_OK_FOR_BASE_P (REGNO (addr.base)))
4540 return false;
4541 if (addr.indx && !REGNO_OK_FOR_INDEX_P (REGNO (addr.indx)))
4542 return false;
4543
4544 /* Avoid LA instructions with index register on z196; it is
4545 preferable to use regular add instructions when possible.
4546 Starting with zEC12 the la with index register is "uncracked"
4547 again. */
4548 if (addr.indx && s390_tune == PROCESSOR_2817_Z196)
4549 return false;
4550
4551 if (!TARGET_64BIT && !addr.pointer)
4552 return false;
4553
4554 if (addr.pointer)
4555 return true;
4556
4557 if ((addr.base && REG_P (addr.base) && REG_POINTER (addr.base))
4558 || (addr.indx && REG_P (addr.indx) && REG_POINTER (addr.indx)))
4559 return true;
4560
4561 return false;
4562 }
4563
4564 /* Emit a forced load-address operation to load SRC into DST.
4565 This will use the LOAD ADDRESS instruction even in situations
4566 where legitimate_la_operand_p (SRC) returns false. */
4567
4568 void
4569 s390_load_address (rtx dst, rtx src)
4570 {
4571 if (TARGET_64BIT)
4572 emit_move_insn (dst, src);
4573 else
4574 emit_insn (gen_force_la_31 (dst, src));
4575 }
4576
4577 /* Return true if it ok to use SYMBOL_REF in a relative address. */
4578
4579 bool
4580 s390_rel_address_ok_p (rtx symbol_ref)
4581 {
4582 tree decl;
4583
4584 if (symbol_ref == s390_got_symbol () || CONSTANT_POOL_ADDRESS_P (symbol_ref))
4585 return true;
4586
4587 decl = SYMBOL_REF_DECL (symbol_ref);
4588
4589 if (!flag_pic || SYMBOL_REF_LOCAL_P (symbol_ref))
4590 return (s390_pic_data_is_text_relative
4591 || (decl
4592 && TREE_CODE (decl) == FUNCTION_DECL));
4593
4594 return false;
4595 }
4596
4597 /* Return a legitimate reference for ORIG (an address) using the
4598 register REG. If REG is 0, a new pseudo is generated.
4599
4600 There are two types of references that must be handled:
4601
4602 1. Global data references must load the address from the GOT, via
4603 the PIC reg. An insn is emitted to do this load, and the reg is
4604 returned.
4605
4606 2. Static data references, constant pool addresses, and code labels
4607 compute the address as an offset from the GOT, whose base is in
4608 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
4609 differentiate them from global data objects. The returned
4610 address is the PIC reg + an unspec constant.
4611
4612 TARGET_LEGITIMIZE_ADDRESS_P rejects symbolic references unless the PIC
4613 reg also appears in the address. */
4614
4615 rtx
4616 legitimize_pic_address (rtx orig, rtx reg)
4617 {
4618 rtx addr = orig;
4619 rtx addend = const0_rtx;
4620 rtx new_rtx = orig;
4621
4622 gcc_assert (!TLS_SYMBOLIC_CONST (addr));
4623
4624 if (GET_CODE (addr) == CONST)
4625 addr = XEXP (addr, 0);
4626
4627 if (GET_CODE (addr) == PLUS)
4628 {
4629 addend = XEXP (addr, 1);
4630 addr = XEXP (addr, 0);
4631 }
4632
4633 if ((GET_CODE (addr) == LABEL_REF
4634 || (SYMBOL_REF_P (addr) && s390_rel_address_ok_p (addr))
4635 || (GET_CODE (addr) == UNSPEC &&
4636 (XINT (addr, 1) == UNSPEC_GOTENT
4637 || (TARGET_CPU_ZARCH && XINT (addr, 1) == UNSPEC_PLT))))
4638 && GET_CODE (addend) == CONST_INT)
4639 {
4640 /* This can be locally addressed. */
4641
4642 /* larl_operand requires UNSPECs to be wrapped in a const rtx. */
4643 rtx const_addr = (GET_CODE (addr) == UNSPEC ?
4644 gen_rtx_CONST (Pmode, addr) : addr);
4645
4646 if (TARGET_CPU_ZARCH
4647 && larl_operand (const_addr, VOIDmode)
4648 && INTVAL (addend) < HOST_WIDE_INT_1 << 31
4649 && INTVAL (addend) >= -(HOST_WIDE_INT_1 << 31))
4650 {
4651 if (INTVAL (addend) & 1)
4652 {
4653 /* LARL can't handle odd offsets, so emit a pair of LARL
4654 and LA. */
4655 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4656
4657 if (!DISP_IN_RANGE (INTVAL (addend)))
4658 {
4659 HOST_WIDE_INT even = INTVAL (addend) - 1;
4660 addr = gen_rtx_PLUS (Pmode, addr, GEN_INT (even));
4661 addr = gen_rtx_CONST (Pmode, addr);
4662 addend = const1_rtx;
4663 }
4664
4665 emit_move_insn (temp, addr);
4666 new_rtx = gen_rtx_PLUS (Pmode, temp, addend);
4667
4668 if (reg != 0)
4669 {
4670 s390_load_address (reg, new_rtx);
4671 new_rtx = reg;
4672 }
4673 }
4674 else
4675 {
4676 /* If the offset is even, we can just use LARL. This
4677 will happen automatically. */
4678 }
4679 }
4680 else
4681 {
4682 /* No larl - Access local symbols relative to the GOT. */
4683
4684 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4685
4686 if (reload_in_progress || reload_completed)
4687 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4688
4689 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
4690 if (addend != const0_rtx)
4691 addr = gen_rtx_PLUS (Pmode, addr, addend);
4692 addr = gen_rtx_CONST (Pmode, addr);
4693 addr = force_const_mem (Pmode, addr);
4694 emit_move_insn (temp, addr);
4695
4696 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4697 if (reg != 0)
4698 {
4699 s390_load_address (reg, new_rtx);
4700 new_rtx = reg;
4701 }
4702 }
4703 }
4704 else if (GET_CODE (addr) == SYMBOL_REF && addend == const0_rtx)
4705 {
4706 /* A non-local symbol reference without addend.
4707
4708 The symbol ref is wrapped into an UNSPEC to make sure the
4709 proper operand modifier (@GOT or @GOTENT) will be emitted.
4710 This will tell the linker to put the symbol into the GOT.
4711
4712 Additionally the code dereferencing the GOT slot is emitted here.
4713
4714 An addend to the symref needs to be added afterwards.
4715 legitimize_pic_address calls itself recursively to handle
4716 that case. So no need to do it here. */
4717
4718 if (reg == 0)
4719 reg = gen_reg_rtx (Pmode);
4720
4721 if (TARGET_Z10)
4722 {
4723 /* Use load relative if possible.
4724 lgrl <target>, sym@GOTENT */
4725 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4726 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4727 new_rtx = gen_const_mem (GET_MODE (reg), new_rtx);
4728
4729 emit_move_insn (reg, new_rtx);
4730 new_rtx = reg;
4731 }
4732 else if (flag_pic == 1)
4733 {
4734 /* Assume GOT offset is a valid displacement operand (< 4k
4735 or < 512k with z990). This is handled the same way in
4736 both 31- and 64-bit code (@GOT).
4737 lg <target>, sym@GOT(r12) */
4738
4739 if (reload_in_progress || reload_completed)
4740 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4741
4742 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4743 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4744 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
4745 new_rtx = gen_const_mem (Pmode, new_rtx);
4746 emit_move_insn (reg, new_rtx);
4747 new_rtx = reg;
4748 }
4749 else if (TARGET_CPU_ZARCH)
4750 {
4751 /* If the GOT offset might be >= 4k, we determine the position
4752 of the GOT entry via a PC-relative LARL (@GOTENT).
4753 larl temp, sym@GOTENT
4754 lg <target>, 0(temp) */
4755
4756 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4757
4758 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4759 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4760
4761 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTENT);
4762 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4763 emit_move_insn (temp, new_rtx);
4764
4765 new_rtx = gen_const_mem (Pmode, temp);
4766 emit_move_insn (reg, new_rtx);
4767
4768 new_rtx = reg;
4769 }
4770 else
4771 {
4772 /* If the GOT offset might be >= 4k, we have to load it
4773 from the literal pool (@GOT).
4774
4775 lg temp, lit-litbase(r13)
4776 lg <target>, 0(temp)
4777 lit: .long sym@GOT */
4778
4779 rtx temp = reg ? reg : gen_reg_rtx (Pmode);
4780
4781 gcc_assert (REGNO (temp) >= FIRST_PSEUDO_REGISTER
4782 || REGNO_REG_CLASS (REGNO (temp)) == ADDR_REGS);
4783
4784 if (reload_in_progress || reload_completed)
4785 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4786
4787 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
4788 addr = gen_rtx_CONST (Pmode, addr);
4789 addr = force_const_mem (Pmode, addr);
4790 emit_move_insn (temp, addr);
4791
4792 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4793 new_rtx = gen_const_mem (Pmode, new_rtx);
4794 emit_move_insn (reg, new_rtx);
4795 new_rtx = reg;
4796 }
4797 }
4798 else if (GET_CODE (addr) == UNSPEC && GET_CODE (addend) == CONST_INT)
4799 {
4800 gcc_assert (XVECLEN (addr, 0) == 1);
4801 switch (XINT (addr, 1))
4802 {
4803 /* These address symbols (or PLT slots) relative to the GOT
4804 (not GOT slots!). In general this will exceed the
4805 displacement range so these value belong into the literal
4806 pool. */
4807 case UNSPEC_GOTOFF:
4808 case UNSPEC_PLTOFF:
4809 new_rtx = force_const_mem (Pmode, orig);
4810 break;
4811
4812 /* For -fPIC the GOT size might exceed the displacement
4813 range so make sure the value is in the literal pool. */
4814 case UNSPEC_GOT:
4815 if (flag_pic == 2)
4816 new_rtx = force_const_mem (Pmode, orig);
4817 break;
4818
4819 /* For @GOTENT larl is used. This is handled like local
4820 symbol refs. */
4821 case UNSPEC_GOTENT:
4822 gcc_unreachable ();
4823 break;
4824
4825 /* @PLT is OK as is on 64-bit, must be converted to
4826 GOT-relative @PLTOFF on 31-bit. */
4827 case UNSPEC_PLT:
4828 if (!TARGET_CPU_ZARCH)
4829 {
4830 rtx temp = reg? reg : gen_reg_rtx (Pmode);
4831
4832 if (reload_in_progress || reload_completed)
4833 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
4834
4835 addr = XVECEXP (addr, 0, 0);
4836 addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr),
4837 UNSPEC_PLTOFF);
4838 if (addend != const0_rtx)
4839 addr = gen_rtx_PLUS (Pmode, addr, addend);
4840 addr = gen_rtx_CONST (Pmode, addr);
4841 addr = force_const_mem (Pmode, addr);
4842 emit_move_insn (temp, addr);
4843
4844 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
4845 if (reg != 0)
4846 {
4847 s390_load_address (reg, new_rtx);
4848 new_rtx = reg;
4849 }
4850 }
4851 else
4852 /* On 64 bit larl can be used. This case is handled like
4853 local symbol refs. */
4854 gcc_unreachable ();
4855 break;
4856
4857 /* Everything else cannot happen. */
4858 default:
4859 gcc_unreachable ();
4860 }
4861 }
4862 else if (addend != const0_rtx)
4863 {
4864 /* Otherwise, compute the sum. */
4865
4866 rtx base = legitimize_pic_address (addr, reg);
4867 new_rtx = legitimize_pic_address (addend,
4868 base == reg ? NULL_RTX : reg);
4869 if (GET_CODE (new_rtx) == CONST_INT)
4870 new_rtx = plus_constant (Pmode, base, INTVAL (new_rtx));
4871 else
4872 {
4873 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
4874 {
4875 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
4876 new_rtx = XEXP (new_rtx, 1);
4877 }
4878 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
4879 }
4880
4881 if (GET_CODE (new_rtx) == CONST)
4882 new_rtx = XEXP (new_rtx, 0);
4883 new_rtx = force_operand (new_rtx, 0);
4884 }
4885
4886 return new_rtx;
4887 }
4888
4889 /* Load the thread pointer into a register. */
4890
4891 rtx
4892 s390_get_thread_pointer (void)
4893 {
4894 rtx tp = gen_reg_rtx (Pmode);
4895
4896 emit_move_insn (tp, gen_rtx_REG (Pmode, TP_REGNUM));
4897 mark_reg_pointer (tp, BITS_PER_WORD);
4898
4899 return tp;
4900 }
4901
4902 /* Emit a tls call insn. The call target is the SYMBOL_REF stored
4903 in s390_tls_symbol which always refers to __tls_get_offset.
4904 The returned offset is written to RESULT_REG and an USE rtx is
4905 generated for TLS_CALL. */
4906
4907 static GTY(()) rtx s390_tls_symbol;
4908
4909 static void
4910 s390_emit_tls_call_insn (rtx result_reg, rtx tls_call)
4911 {
4912 rtx insn;
4913
4914 if (!flag_pic)
4915 emit_insn (s390_load_got ());
4916
4917 if (!s390_tls_symbol)
4918 s390_tls_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tls_get_offset");
4919
4920 insn = s390_emit_call (s390_tls_symbol, tls_call, result_reg,
4921 gen_rtx_REG (Pmode, RETURN_REGNUM));
4922
4923 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), result_reg);
4924 RTL_CONST_CALL_P (insn) = 1;
4925 }
4926
4927 /* ADDR contains a thread-local SYMBOL_REF. Generate code to compute
4928 this (thread-local) address. REG may be used as temporary. */
4929
4930 static rtx
4931 legitimize_tls_address (rtx addr, rtx reg)
4932 {
4933 rtx new_rtx, tls_call, temp, base, r2;
4934 rtx_insn *insn;
4935
4936 if (GET_CODE (addr) == SYMBOL_REF)
4937 switch (tls_symbolic_operand (addr))
4938 {
4939 case TLS_MODEL_GLOBAL_DYNAMIC:
4940 start_sequence ();
4941 r2 = gen_rtx_REG (Pmode, 2);
4942 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_TLSGD);
4943 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4944 new_rtx = force_const_mem (Pmode, new_rtx);
4945 emit_move_insn (r2, new_rtx);
4946 s390_emit_tls_call_insn (r2, tls_call);
4947 insn = get_insns ();
4948 end_sequence ();
4949
4950 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
4951 temp = gen_reg_rtx (Pmode);
4952 emit_libcall_block (insn, temp, r2, new_rtx);
4953
4954 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4955 if (reg != 0)
4956 {
4957 s390_load_address (reg, new_rtx);
4958 new_rtx = reg;
4959 }
4960 break;
4961
4962 case TLS_MODEL_LOCAL_DYNAMIC:
4963 start_sequence ();
4964 r2 = gen_rtx_REG (Pmode, 2);
4965 tls_call = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM);
4966 new_rtx = gen_rtx_CONST (Pmode, tls_call);
4967 new_rtx = force_const_mem (Pmode, new_rtx);
4968 emit_move_insn (r2, new_rtx);
4969 s390_emit_tls_call_insn (r2, tls_call);
4970 insn = get_insns ();
4971 end_sequence ();
4972
4973 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TLSLDM_NTPOFF);
4974 temp = gen_reg_rtx (Pmode);
4975 emit_libcall_block (insn, temp, r2, new_rtx);
4976
4977 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
4978 base = gen_reg_rtx (Pmode);
4979 s390_load_address (base, new_rtx);
4980
4981 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_DTPOFF);
4982 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
4983 new_rtx = force_const_mem (Pmode, new_rtx);
4984 temp = gen_reg_rtx (Pmode);
4985 emit_move_insn (temp, new_rtx);
4986
4987 new_rtx = gen_rtx_PLUS (Pmode, base, temp);
4988 if (reg != 0)
4989 {
4990 s390_load_address (reg, new_rtx);
4991 new_rtx = reg;
4992 }
4993 break;
4994
4995 case TLS_MODEL_INITIAL_EXEC:
4996 if (flag_pic == 1)
4997 {
4998 /* Assume GOT offset < 4k. This is handled the same way
4999 in both 31- and 64-bit code. */
5000
5001 if (reload_in_progress || reload_completed)
5002 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5003
5004 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5005 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5006 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
5007 new_rtx = gen_const_mem (Pmode, new_rtx);
5008 temp = gen_reg_rtx (Pmode);
5009 emit_move_insn (temp, new_rtx);
5010 }
5011 else if (TARGET_CPU_ZARCH)
5012 {
5013 /* If the GOT offset might be >= 4k, we determine the position
5014 of the GOT entry via a PC-relative LARL. */
5015
5016 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5017 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5018 temp = gen_reg_rtx (Pmode);
5019 emit_move_insn (temp, new_rtx);
5020
5021 new_rtx = gen_const_mem (Pmode, temp);
5022 temp = gen_reg_rtx (Pmode);
5023 emit_move_insn (temp, new_rtx);
5024 }
5025 else if (flag_pic)
5026 {
5027 /* If the GOT offset might be >= 4k, we have to load it
5028 from the literal pool. */
5029
5030 if (reload_in_progress || reload_completed)
5031 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
5032
5033 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTNTPOFF);
5034 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5035 new_rtx = force_const_mem (Pmode, new_rtx);
5036 temp = gen_reg_rtx (Pmode);
5037 emit_move_insn (temp, new_rtx);
5038
5039 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, temp);
5040 new_rtx = gen_const_mem (Pmode, new_rtx);
5041
5042 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5043 temp = gen_reg_rtx (Pmode);
5044 emit_insn (gen_rtx_SET (temp, new_rtx));
5045 }
5046 else
5047 {
5048 /* In position-dependent code, load the absolute address of
5049 the GOT entry from the literal pool. */
5050
5051 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_INDNTPOFF);
5052 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5053 new_rtx = force_const_mem (Pmode, new_rtx);
5054 temp = gen_reg_rtx (Pmode);
5055 emit_move_insn (temp, new_rtx);
5056
5057 new_rtx = temp;
5058 new_rtx = gen_const_mem (Pmode, new_rtx);
5059 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, new_rtx, addr), UNSPEC_TLS_LOAD);
5060 temp = gen_reg_rtx (Pmode);
5061 emit_insn (gen_rtx_SET (temp, new_rtx));
5062 }
5063
5064 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5065 if (reg != 0)
5066 {
5067 s390_load_address (reg, new_rtx);
5068 new_rtx = reg;
5069 }
5070 break;
5071
5072 case TLS_MODEL_LOCAL_EXEC:
5073 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_NTPOFF);
5074 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5075 new_rtx = force_const_mem (Pmode, new_rtx);
5076 temp = gen_reg_rtx (Pmode);
5077 emit_move_insn (temp, new_rtx);
5078
5079 new_rtx = gen_rtx_PLUS (Pmode, s390_get_thread_pointer (), temp);
5080 if (reg != 0)
5081 {
5082 s390_load_address (reg, new_rtx);
5083 new_rtx = reg;
5084 }
5085 break;
5086
5087 default:
5088 gcc_unreachable ();
5089 }
5090
5091 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == UNSPEC)
5092 {
5093 switch (XINT (XEXP (addr, 0), 1))
5094 {
5095 case UNSPEC_INDNTPOFF:
5096 gcc_assert (TARGET_CPU_ZARCH);
5097 new_rtx = addr;
5098 break;
5099
5100 default:
5101 gcc_unreachable ();
5102 }
5103 }
5104
5105 else if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS
5106 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
5107 {
5108 new_rtx = XEXP (XEXP (addr, 0), 0);
5109 if (GET_CODE (new_rtx) != SYMBOL_REF)
5110 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
5111
5112 new_rtx = legitimize_tls_address (new_rtx, reg);
5113 new_rtx = plus_constant (Pmode, new_rtx,
5114 INTVAL (XEXP (XEXP (addr, 0), 1)));
5115 new_rtx = force_operand (new_rtx, 0);
5116 }
5117
5118 else
5119 gcc_unreachable (); /* for now ... */
5120
5121 return new_rtx;
5122 }
5123
5124 /* Emit insns making the address in operands[1] valid for a standard
5125 move to operands[0]. operands[1] is replaced by an address which
5126 should be used instead of the former RTX to emit the move
5127 pattern. */
5128
5129 void
5130 emit_symbolic_move (rtx *operands)
5131 {
5132 rtx temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
5133
5134 if (GET_CODE (operands[0]) == MEM)
5135 operands[1] = force_reg (Pmode, operands[1]);
5136 else if (TLS_SYMBOLIC_CONST (operands[1]))
5137 operands[1] = legitimize_tls_address (operands[1], temp);
5138 else if (flag_pic)
5139 operands[1] = legitimize_pic_address (operands[1], temp);
5140 }
5141
5142 /* Try machine-dependent ways of modifying an illegitimate address X
5143 to be legitimate. If we find one, return the new, valid address.
5144
5145 OLDX is the address as it was before break_out_memory_refs was called.
5146 In some cases it is useful to look at this to decide what needs to be done.
5147
5148 MODE is the mode of the operand pointed to by X.
5149
5150 When -fpic is used, special handling is needed for symbolic references.
5151 See comments by legitimize_pic_address for details. */
5152
5153 static rtx
5154 s390_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
5155 machine_mode mode ATTRIBUTE_UNUSED)
5156 {
5157 rtx constant_term = const0_rtx;
5158
5159 if (TLS_SYMBOLIC_CONST (x))
5160 {
5161 x = legitimize_tls_address (x, 0);
5162
5163 if (s390_legitimate_address_p (mode, x, FALSE))
5164 return x;
5165 }
5166 else if (GET_CODE (x) == PLUS
5167 && (TLS_SYMBOLIC_CONST (XEXP (x, 0))
5168 || TLS_SYMBOLIC_CONST (XEXP (x, 1))))
5169 {
5170 return x;
5171 }
5172 else if (flag_pic)
5173 {
5174 if (SYMBOLIC_CONST (x)
5175 || (GET_CODE (x) == PLUS
5176 && (SYMBOLIC_CONST (XEXP (x, 0))
5177 || SYMBOLIC_CONST (XEXP (x, 1)))))
5178 x = legitimize_pic_address (x, 0);
5179
5180 if (s390_legitimate_address_p (mode, x, FALSE))
5181 return x;
5182 }
5183
5184 x = eliminate_constant_term (x, &constant_term);
5185
5186 /* Optimize loading of large displacements by splitting them
5187 into the multiple of 4K and the rest; this allows the
5188 former to be CSE'd if possible.
5189
5190 Don't do this if the displacement is added to a register
5191 pointing into the stack frame, as the offsets will
5192 change later anyway. */
5193
5194 if (GET_CODE (constant_term) == CONST_INT
5195 && !TARGET_LONG_DISPLACEMENT
5196 && !DISP_IN_RANGE (INTVAL (constant_term))
5197 && !(REG_P (x) && REGNO_PTR_FRAME_P (REGNO (x))))
5198 {
5199 HOST_WIDE_INT lower = INTVAL (constant_term) & 0xfff;
5200 HOST_WIDE_INT upper = INTVAL (constant_term) ^ lower;
5201
5202 rtx temp = gen_reg_rtx (Pmode);
5203 rtx val = force_operand (GEN_INT (upper), temp);
5204 if (val != temp)
5205 emit_move_insn (temp, val);
5206
5207 x = gen_rtx_PLUS (Pmode, x, temp);
5208 constant_term = GEN_INT (lower);
5209 }
5210
5211 if (GET_CODE (x) == PLUS)
5212 {
5213 if (GET_CODE (XEXP (x, 0)) == REG)
5214 {
5215 rtx temp = gen_reg_rtx (Pmode);
5216 rtx val = force_operand (XEXP (x, 1), temp);
5217 if (val != temp)
5218 emit_move_insn (temp, val);
5219
5220 x = gen_rtx_PLUS (Pmode, XEXP (x, 0), temp);
5221 }
5222
5223 else if (GET_CODE (XEXP (x, 1)) == REG)
5224 {
5225 rtx temp = gen_reg_rtx (Pmode);
5226 rtx val = force_operand (XEXP (x, 0), temp);
5227 if (val != temp)
5228 emit_move_insn (temp, val);
5229
5230 x = gen_rtx_PLUS (Pmode, temp, XEXP (x, 1));
5231 }
5232 }
5233
5234 if (constant_term != const0_rtx)
5235 x = gen_rtx_PLUS (Pmode, x, constant_term);
5236
5237 return x;
5238 }
5239
5240 /* Try a machine-dependent way of reloading an illegitimate address AD
5241 operand. If we find one, push the reload and return the new address.
5242
5243 MODE is the mode of the enclosing MEM. OPNUM is the operand number
5244 and TYPE is the reload type of the current reload. */
5245
5246 rtx
5247 legitimize_reload_address (rtx ad, machine_mode mode ATTRIBUTE_UNUSED,
5248 int opnum, int type)
5249 {
5250 if (!optimize || TARGET_LONG_DISPLACEMENT)
5251 return NULL_RTX;
5252
5253 if (GET_CODE (ad) == PLUS)
5254 {
5255 rtx tem = simplify_binary_operation (PLUS, Pmode,
5256 XEXP (ad, 0), XEXP (ad, 1));
5257 if (tem)
5258 ad = tem;
5259 }
5260
5261 if (GET_CODE (ad) == PLUS
5262 && GET_CODE (XEXP (ad, 0)) == REG
5263 && GET_CODE (XEXP (ad, 1)) == CONST_INT
5264 && !DISP_IN_RANGE (INTVAL (XEXP (ad, 1))))
5265 {
5266 HOST_WIDE_INT lower = INTVAL (XEXP (ad, 1)) & 0xfff;
5267 HOST_WIDE_INT upper = INTVAL (XEXP (ad, 1)) ^ lower;
5268 rtx cst, tem, new_rtx;
5269
5270 cst = GEN_INT (upper);
5271 if (!legitimate_reload_constant_p (cst))
5272 cst = force_const_mem (Pmode, cst);
5273
5274 tem = gen_rtx_PLUS (Pmode, XEXP (ad, 0), cst);
5275 new_rtx = gen_rtx_PLUS (Pmode, tem, GEN_INT (lower));
5276
5277 push_reload (XEXP (tem, 1), 0, &XEXP (tem, 1), 0,
5278 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0,
5279 opnum, (enum reload_type) type);
5280 return new_rtx;
5281 }
5282
5283 return NULL_RTX;
5284 }
5285
5286 /* Emit code to move LEN bytes from DST to SRC. */
5287
5288 bool
5289 s390_expand_movmem (rtx dst, rtx src, rtx len)
5290 {
5291 /* When tuning for z10 or higher we rely on the Glibc functions to
5292 do the right thing. Only for constant lengths below 64k we will
5293 generate inline code. */
5294 if (s390_tune >= PROCESSOR_2097_Z10
5295 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5296 return false;
5297
5298 /* Expand memcpy for constant length operands without a loop if it
5299 is shorter that way.
5300
5301 With a constant length argument a
5302 memcpy loop (without pfd) is 36 bytes -> 6 * mvc */
5303 if (GET_CODE (len) == CONST_INT
5304 && INTVAL (len) >= 0
5305 && INTVAL (len) <= 256 * 6
5306 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5307 {
5308 HOST_WIDE_INT o, l;
5309
5310 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5311 {
5312 rtx newdst = adjust_address (dst, BLKmode, o);
5313 rtx newsrc = adjust_address (src, BLKmode, o);
5314 emit_insn (gen_movmem_short (newdst, newsrc,
5315 GEN_INT (l > 256 ? 255 : l - 1)));
5316 }
5317 }
5318
5319 else if (TARGET_MVCLE)
5320 {
5321 emit_insn (gen_movmem_long (dst, src, convert_to_mode (Pmode, len, 1)));
5322 }
5323
5324 else
5325 {
5326 rtx dst_addr, src_addr, count, blocks, temp;
5327 rtx_code_label *loop_start_label = gen_label_rtx ();
5328 rtx_code_label *loop_end_label = gen_label_rtx ();
5329 rtx_code_label *end_label = gen_label_rtx ();
5330 machine_mode mode;
5331
5332 mode = GET_MODE (len);
5333 if (mode == VOIDmode)
5334 mode = Pmode;
5335
5336 dst_addr = gen_reg_rtx (Pmode);
5337 src_addr = gen_reg_rtx (Pmode);
5338 count = gen_reg_rtx (mode);
5339 blocks = gen_reg_rtx (mode);
5340
5341 convert_move (count, len, 1);
5342 emit_cmp_and_jump_insns (count, const0_rtx,
5343 EQ, NULL_RTX, mode, 1, end_label);
5344
5345 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5346 emit_move_insn (src_addr, force_operand (XEXP (src, 0), NULL_RTX));
5347 dst = change_address (dst, VOIDmode, dst_addr);
5348 src = change_address (src, VOIDmode, src_addr);
5349
5350 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5351 OPTAB_DIRECT);
5352 if (temp != count)
5353 emit_move_insn (count, temp);
5354
5355 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5356 OPTAB_DIRECT);
5357 if (temp != blocks)
5358 emit_move_insn (blocks, temp);
5359
5360 emit_cmp_and_jump_insns (blocks, const0_rtx,
5361 EQ, NULL_RTX, mode, 1, loop_end_label);
5362
5363 emit_label (loop_start_label);
5364
5365 if (TARGET_Z10
5366 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 768))
5367 {
5368 rtx prefetch;
5369
5370 /* Issue a read prefetch for the +3 cache line. */
5371 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, src_addr, GEN_INT (768)),
5372 const0_rtx, const0_rtx);
5373 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5374 emit_insn (prefetch);
5375
5376 /* Issue a write prefetch for the +3 cache line. */
5377 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (768)),
5378 const1_rtx, const0_rtx);
5379 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5380 emit_insn (prefetch);
5381 }
5382
5383 emit_insn (gen_movmem_short (dst, src, GEN_INT (255)));
5384 s390_load_address (dst_addr,
5385 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5386 s390_load_address (src_addr,
5387 gen_rtx_PLUS (Pmode, src_addr, GEN_INT (256)));
5388
5389 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5390 OPTAB_DIRECT);
5391 if (temp != blocks)
5392 emit_move_insn (blocks, temp);
5393
5394 emit_cmp_and_jump_insns (blocks, const0_rtx,
5395 EQ, NULL_RTX, mode, 1, loop_end_label);
5396
5397 emit_jump (loop_start_label);
5398 emit_label (loop_end_label);
5399
5400 emit_insn (gen_movmem_short (dst, src,
5401 convert_to_mode (Pmode, count, 1)));
5402 emit_label (end_label);
5403 }
5404 return true;
5405 }
5406
5407 /* Emit code to set LEN bytes at DST to VAL.
5408 Make use of clrmem if VAL is zero. */
5409
5410 void
5411 s390_expand_setmem (rtx dst, rtx len, rtx val)
5412 {
5413 if (GET_CODE (len) == CONST_INT && INTVAL (len) <= 0)
5414 return;
5415
5416 gcc_assert (GET_CODE (val) == CONST_INT || GET_MODE (val) == QImode);
5417
5418 /* Expand setmem/clrmem for a constant length operand without a
5419 loop if it will be shorter that way.
5420 With a constant length and without pfd argument a
5421 clrmem loop is 32 bytes -> 5.3 * xc
5422 setmem loop is 36 bytes -> 3.6 * (mvi/stc + mvc) */
5423 if (GET_CODE (len) == CONST_INT
5424 && ((INTVAL (len) <= 256 * 5 && val == const0_rtx)
5425 || INTVAL (len) <= 257 * 3)
5426 && (!TARGET_MVCLE || INTVAL (len) <= 256))
5427 {
5428 HOST_WIDE_INT o, l;
5429
5430 if (val == const0_rtx)
5431 /* clrmem: emit 256 byte blockwise XCs. */
5432 for (l = INTVAL (len), o = 0; l > 0; l -= 256, o += 256)
5433 {
5434 rtx newdst = adjust_address (dst, BLKmode, o);
5435 emit_insn (gen_clrmem_short (newdst,
5436 GEN_INT (l > 256 ? 255 : l - 1)));
5437 }
5438 else
5439 /* setmem: emit 1(mvi) + 256(mvc) byte blockwise memsets by
5440 setting first byte to val and using a 256 byte mvc with one
5441 byte overlap to propagate the byte. */
5442 for (l = INTVAL (len), o = 0; l > 0; l -= 257, o += 257)
5443 {
5444 rtx newdst = adjust_address (dst, BLKmode, o);
5445 emit_move_insn (adjust_address (dst, QImode, o), val);
5446 if (l > 1)
5447 {
5448 rtx newdstp1 = adjust_address (dst, BLKmode, o + 1);
5449 emit_insn (gen_movmem_short (newdstp1, newdst,
5450 GEN_INT (l > 257 ? 255 : l - 2)));
5451 }
5452 }
5453 }
5454
5455 else if (TARGET_MVCLE)
5456 {
5457 val = force_not_mem (convert_modes (Pmode, QImode, val, 1));
5458 if (TARGET_64BIT)
5459 emit_insn (gen_setmem_long_di (dst, convert_to_mode (Pmode, len, 1),
5460 val));
5461 else
5462 emit_insn (gen_setmem_long_si (dst, convert_to_mode (Pmode, len, 1),
5463 val));
5464 }
5465
5466 else
5467 {
5468 rtx dst_addr, count, blocks, temp, dstp1 = NULL_RTX;
5469 rtx_code_label *loop_start_label = gen_label_rtx ();
5470 rtx_code_label *onebyte_end_label = gen_label_rtx ();
5471 rtx_code_label *zerobyte_end_label = gen_label_rtx ();
5472 rtx_code_label *restbyte_end_label = gen_label_rtx ();
5473 machine_mode mode;
5474
5475 mode = GET_MODE (len);
5476 if (mode == VOIDmode)
5477 mode = Pmode;
5478
5479 dst_addr = gen_reg_rtx (Pmode);
5480 count = gen_reg_rtx (mode);
5481 blocks = gen_reg_rtx (mode);
5482
5483 convert_move (count, len, 1);
5484 emit_cmp_and_jump_insns (count, const0_rtx,
5485 EQ, NULL_RTX, mode, 1, zerobyte_end_label,
5486 profile_probability::very_unlikely ());
5487
5488 /* We need to make a copy of the target address since memset is
5489 supposed to return it unmodified. We have to make it here
5490 already since the new reg is used at onebyte_end_label. */
5491 emit_move_insn (dst_addr, force_operand (XEXP (dst, 0), NULL_RTX));
5492 dst = change_address (dst, VOIDmode, dst_addr);
5493
5494 if (val != const0_rtx)
5495 {
5496 /* When using the overlapping mvc the original target
5497 address is only accessed as single byte entity (even by
5498 the mvc reading this value). */
5499 set_mem_size (dst, 1);
5500 dstp1 = adjust_address (dst, VOIDmode, 1);
5501 emit_cmp_and_jump_insns (count,
5502 const1_rtx, EQ, NULL_RTX, mode, 1,
5503 onebyte_end_label,
5504 profile_probability::very_unlikely ());
5505 }
5506
5507 /* There is one unconditional (mvi+mvc)/xc after the loop
5508 dealing with the rest of the bytes, subtracting two (mvi+mvc)
5509 or one (xc) here leaves this number of bytes to be handled by
5510 it. */
5511 temp = expand_binop (mode, add_optab, count,
5512 val == const0_rtx ? constm1_rtx : GEN_INT (-2),
5513 count, 1, OPTAB_DIRECT);
5514 if (temp != count)
5515 emit_move_insn (count, temp);
5516
5517 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5518 OPTAB_DIRECT);
5519 if (temp != blocks)
5520 emit_move_insn (blocks, temp);
5521
5522 emit_cmp_and_jump_insns (blocks, const0_rtx,
5523 EQ, NULL_RTX, mode, 1, restbyte_end_label);
5524
5525 emit_jump (loop_start_label);
5526
5527 if (val != const0_rtx)
5528 {
5529 /* The 1 byte != 0 special case. Not handled efficiently
5530 since we require two jumps for that. However, this
5531 should be very rare. */
5532 emit_label (onebyte_end_label);
5533 emit_move_insn (adjust_address (dst, QImode, 0), val);
5534 emit_jump (zerobyte_end_label);
5535 }
5536
5537 emit_label (loop_start_label);
5538
5539 if (TARGET_Z10
5540 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 1024))
5541 {
5542 /* Issue a write prefetch for the +4 cache line. */
5543 rtx prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, dst_addr,
5544 GEN_INT (1024)),
5545 const1_rtx, const0_rtx);
5546 emit_insn (prefetch);
5547 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5548 }
5549
5550 if (val == const0_rtx)
5551 emit_insn (gen_clrmem_short (dst, GEN_INT (255)));
5552 else
5553 {
5554 /* Set the first byte in the block to the value and use an
5555 overlapping mvc for the block. */
5556 emit_move_insn (adjust_address (dst, QImode, 0), val);
5557 emit_insn (gen_movmem_short (dstp1, dst, GEN_INT (254)));
5558 }
5559 s390_load_address (dst_addr,
5560 gen_rtx_PLUS (Pmode, dst_addr, GEN_INT (256)));
5561
5562 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5563 OPTAB_DIRECT);
5564 if (temp != blocks)
5565 emit_move_insn (blocks, temp);
5566
5567 emit_cmp_and_jump_insns (blocks, const0_rtx,
5568 NE, NULL_RTX, mode, 1, loop_start_label);
5569
5570 emit_label (restbyte_end_label);
5571
5572 if (val == const0_rtx)
5573 emit_insn (gen_clrmem_short (dst, convert_to_mode (Pmode, count, 1)));
5574 else
5575 {
5576 /* Set the first byte in the block to the value and use an
5577 overlapping mvc for the block. */
5578 emit_move_insn (adjust_address (dst, QImode, 0), val);
5579 /* execute only uses the lowest 8 bits of count that's
5580 exactly what we need here. */
5581 emit_insn (gen_movmem_short (dstp1, dst,
5582 convert_to_mode (Pmode, count, 1)));
5583 }
5584
5585 emit_label (zerobyte_end_label);
5586 }
5587 }
5588
5589 /* Emit code to compare LEN bytes at OP0 with those at OP1,
5590 and return the result in TARGET. */
5591
5592 bool
5593 s390_expand_cmpmem (rtx target, rtx op0, rtx op1, rtx len)
5594 {
5595 rtx ccreg = gen_rtx_REG (CCUmode, CC_REGNUM);
5596 rtx tmp;
5597
5598 /* When tuning for z10 or higher we rely on the Glibc functions to
5599 do the right thing. Only for constant lengths below 64k we will
5600 generate inline code. */
5601 if (s390_tune >= PROCESSOR_2097_Z10
5602 && (GET_CODE (len) != CONST_INT || INTVAL (len) > (1<<16)))
5603 return false;
5604
5605 /* As the result of CMPINT is inverted compared to what we need,
5606 we have to swap the operands. */
5607 tmp = op0; op0 = op1; op1 = tmp;
5608
5609 if (GET_CODE (len) == CONST_INT && INTVAL (len) >= 0 && INTVAL (len) <= 256)
5610 {
5611 if (INTVAL (len) > 0)
5612 {
5613 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (INTVAL (len) - 1)));
5614 emit_insn (gen_cmpint (target, ccreg));
5615 }
5616 else
5617 emit_move_insn (target, const0_rtx);
5618 }
5619 else if (TARGET_MVCLE)
5620 {
5621 emit_insn (gen_cmpmem_long (op0, op1, convert_to_mode (Pmode, len, 1)));
5622 emit_insn (gen_cmpint (target, ccreg));
5623 }
5624 else
5625 {
5626 rtx addr0, addr1, count, blocks, temp;
5627 rtx_code_label *loop_start_label = gen_label_rtx ();
5628 rtx_code_label *loop_end_label = gen_label_rtx ();
5629 rtx_code_label *end_label = gen_label_rtx ();
5630 machine_mode mode;
5631
5632 mode = GET_MODE (len);
5633 if (mode == VOIDmode)
5634 mode = Pmode;
5635
5636 addr0 = gen_reg_rtx (Pmode);
5637 addr1 = gen_reg_rtx (Pmode);
5638 count = gen_reg_rtx (mode);
5639 blocks = gen_reg_rtx (mode);
5640
5641 convert_move (count, len, 1);
5642 emit_cmp_and_jump_insns (count, const0_rtx,
5643 EQ, NULL_RTX, mode, 1, end_label);
5644
5645 emit_move_insn (addr0, force_operand (XEXP (op0, 0), NULL_RTX));
5646 emit_move_insn (addr1, force_operand (XEXP (op1, 0), NULL_RTX));
5647 op0 = change_address (op0, VOIDmode, addr0);
5648 op1 = change_address (op1, VOIDmode, addr1);
5649
5650 temp = expand_binop (mode, add_optab, count, constm1_rtx, count, 1,
5651 OPTAB_DIRECT);
5652 if (temp != count)
5653 emit_move_insn (count, temp);
5654
5655 temp = expand_binop (mode, lshr_optab, count, GEN_INT (8), blocks, 1,
5656 OPTAB_DIRECT);
5657 if (temp != blocks)
5658 emit_move_insn (blocks, temp);
5659
5660 emit_cmp_and_jump_insns (blocks, const0_rtx,
5661 EQ, NULL_RTX, mode, 1, loop_end_label);
5662
5663 emit_label (loop_start_label);
5664
5665 if (TARGET_Z10
5666 && (GET_CODE (len) != CONST_INT || INTVAL (len) > 512))
5667 {
5668 rtx prefetch;
5669
5670 /* Issue a read prefetch for the +2 cache line of operand 1. */
5671 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr0, GEN_INT (512)),
5672 const0_rtx, const0_rtx);
5673 emit_insn (prefetch);
5674 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5675
5676 /* Issue a read prefetch for the +2 cache line of operand 2. */
5677 prefetch = gen_prefetch (gen_rtx_PLUS (Pmode, addr1, GEN_INT (512)),
5678 const0_rtx, const0_rtx);
5679 emit_insn (prefetch);
5680 PREFETCH_SCHEDULE_BARRIER_P (prefetch) = true;
5681 }
5682
5683 emit_insn (gen_cmpmem_short (op0, op1, GEN_INT (255)));
5684 temp = gen_rtx_NE (VOIDmode, ccreg, const0_rtx);
5685 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5686 gen_rtx_LABEL_REF (VOIDmode, end_label), pc_rtx);
5687 temp = gen_rtx_SET (pc_rtx, temp);
5688 emit_jump_insn (temp);
5689
5690 s390_load_address (addr0,
5691 gen_rtx_PLUS (Pmode, addr0, GEN_INT (256)));
5692 s390_load_address (addr1,
5693 gen_rtx_PLUS (Pmode, addr1, GEN_INT (256)));
5694
5695 temp = expand_binop (mode, add_optab, blocks, constm1_rtx, blocks, 1,
5696 OPTAB_DIRECT);
5697 if (temp != blocks)
5698 emit_move_insn (blocks, temp);
5699
5700 emit_cmp_and_jump_insns (blocks, const0_rtx,
5701 EQ, NULL_RTX, mode, 1, loop_end_label);
5702
5703 emit_jump (loop_start_label);
5704 emit_label (loop_end_label);
5705
5706 emit_insn (gen_cmpmem_short (op0, op1,
5707 convert_to_mode (Pmode, count, 1)));
5708 emit_label (end_label);
5709
5710 emit_insn (gen_cmpint (target, ccreg));
5711 }
5712 return true;
5713 }
5714
5715 /* Emit a conditional jump to LABEL for condition code mask MASK using
5716 comparsion operator COMPARISON. Return the emitted jump insn. */
5717
5718 static rtx_insn *
5719 s390_emit_ccraw_jump (HOST_WIDE_INT mask, enum rtx_code comparison, rtx label)
5720 {
5721 rtx temp;
5722
5723 gcc_assert (comparison == EQ || comparison == NE);
5724 gcc_assert (mask > 0 && mask < 15);
5725
5726 temp = gen_rtx_fmt_ee (comparison, VOIDmode,
5727 gen_rtx_REG (CCRAWmode, CC_REGNUM), GEN_INT (mask));
5728 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
5729 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
5730 temp = gen_rtx_SET (pc_rtx, temp);
5731 return emit_jump_insn (temp);
5732 }
5733
5734 /* Emit the instructions to implement strlen of STRING and store the
5735 result in TARGET. The string has the known ALIGNMENT. This
5736 version uses vector instructions and is therefore not appropriate
5737 for targets prior to z13. */
5738
5739 void
5740 s390_expand_vec_strlen (rtx target, rtx string, rtx alignment)
5741 {
5742 rtx highest_index_to_load_reg = gen_reg_rtx (Pmode);
5743 rtx str_reg = gen_reg_rtx (V16QImode);
5744 rtx str_addr_base_reg = gen_reg_rtx (Pmode);
5745 rtx str_idx_reg = gen_reg_rtx (Pmode);
5746 rtx result_reg = gen_reg_rtx (V16QImode);
5747 rtx is_aligned_label = gen_label_rtx ();
5748 rtx into_loop_label = NULL_RTX;
5749 rtx loop_start_label = gen_label_rtx ();
5750 rtx temp;
5751 rtx len = gen_reg_rtx (QImode);
5752 rtx cond;
5753
5754 s390_load_address (str_addr_base_reg, XEXP (string, 0));
5755 emit_move_insn (str_idx_reg, const0_rtx);
5756
5757 if (INTVAL (alignment) < 16)
5758 {
5759 /* Check whether the address happens to be aligned properly so
5760 jump directly to the aligned loop. */
5761 emit_cmp_and_jump_insns (gen_rtx_AND (Pmode,
5762 str_addr_base_reg, GEN_INT (15)),
5763 const0_rtx, EQ, NULL_RTX,
5764 Pmode, 1, is_aligned_label);
5765
5766 temp = gen_reg_rtx (Pmode);
5767 temp = expand_binop (Pmode, and_optab, str_addr_base_reg,
5768 GEN_INT (15), temp, 1, OPTAB_DIRECT);
5769 gcc_assert (REG_P (temp));
5770 highest_index_to_load_reg =
5771 expand_binop (Pmode, sub_optab, GEN_INT (15), temp,
5772 highest_index_to_load_reg, 1, OPTAB_DIRECT);
5773 gcc_assert (REG_P (highest_index_to_load_reg));
5774 emit_insn (gen_vllv16qi (str_reg,
5775 convert_to_mode (SImode, highest_index_to_load_reg, 1),
5776 gen_rtx_MEM (BLKmode, str_addr_base_reg)));
5777
5778 into_loop_label = gen_label_rtx ();
5779 s390_emit_jump (into_loop_label, NULL_RTX);
5780 emit_barrier ();
5781 }
5782
5783 emit_label (is_aligned_label);
5784 LABEL_NUSES (is_aligned_label) = INTVAL (alignment) < 16 ? 2 : 1;
5785
5786 /* Reaching this point we are only performing 16 bytes aligned
5787 loads. */
5788 emit_move_insn (highest_index_to_load_reg, GEN_INT (15));
5789
5790 emit_label (loop_start_label);
5791 LABEL_NUSES (loop_start_label) = 1;
5792
5793 /* Load 16 bytes of the string into VR. */
5794 emit_move_insn (str_reg,
5795 gen_rtx_MEM (V16QImode,
5796 gen_rtx_PLUS (Pmode, str_idx_reg,
5797 str_addr_base_reg)));
5798 if (into_loop_label != NULL_RTX)
5799 {
5800 emit_label (into_loop_label);
5801 LABEL_NUSES (into_loop_label) = 1;
5802 }
5803
5804 /* Increment string index by 16 bytes. */
5805 expand_binop (Pmode, add_optab, str_idx_reg, GEN_INT (16),
5806 str_idx_reg, 1, OPTAB_DIRECT);
5807
5808 emit_insn (gen_vec_vfenesv16qi (result_reg, str_reg, str_reg,
5809 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5810
5811 add_int_reg_note (s390_emit_ccraw_jump (8, NE, loop_start_label),
5812 REG_BR_PROB,
5813 profile_probability::very_likely ().to_reg_br_prob_note ());
5814 emit_insn (gen_vec_extractv16qiqi (len, result_reg, GEN_INT (7)));
5815
5816 /* If the string pointer wasn't aligned we have loaded less then 16
5817 bytes and the remaining bytes got filled with zeros (by vll).
5818 Now we have to check whether the resulting index lies within the
5819 bytes actually part of the string. */
5820
5821 cond = s390_emit_compare (GT, convert_to_mode (Pmode, len, 1),
5822 highest_index_to_load_reg);
5823 s390_load_address (highest_index_to_load_reg,
5824 gen_rtx_PLUS (Pmode, highest_index_to_load_reg,
5825 const1_rtx));
5826 if (TARGET_64BIT)
5827 emit_insn (gen_movdicc (str_idx_reg, cond,
5828 highest_index_to_load_reg, str_idx_reg));
5829 else
5830 emit_insn (gen_movsicc (str_idx_reg, cond,
5831 highest_index_to_load_reg, str_idx_reg));
5832
5833 add_reg_br_prob_note (s390_emit_jump (is_aligned_label, cond),
5834 profile_probability::very_unlikely ());
5835
5836 expand_binop (Pmode, add_optab, str_idx_reg,
5837 GEN_INT (-16), str_idx_reg, 1, OPTAB_DIRECT);
5838 /* FIXME: len is already zero extended - so avoid the llgcr emitted
5839 here. */
5840 temp = expand_binop (Pmode, add_optab, str_idx_reg,
5841 convert_to_mode (Pmode, len, 1),
5842 target, 1, OPTAB_DIRECT);
5843 if (temp != target)
5844 emit_move_insn (target, temp);
5845 }
5846
5847 void
5848 s390_expand_vec_movstr (rtx result, rtx dst, rtx src)
5849 {
5850 rtx temp = gen_reg_rtx (Pmode);
5851 rtx src_addr = XEXP (src, 0);
5852 rtx dst_addr = XEXP (dst, 0);
5853 rtx src_addr_reg = gen_reg_rtx (Pmode);
5854 rtx dst_addr_reg = gen_reg_rtx (Pmode);
5855 rtx offset = gen_reg_rtx (Pmode);
5856 rtx vsrc = gen_reg_rtx (V16QImode);
5857 rtx vpos = gen_reg_rtx (V16QImode);
5858 rtx loadlen = gen_reg_rtx (SImode);
5859 rtx gpos_qi = gen_reg_rtx(QImode);
5860 rtx gpos = gen_reg_rtx (SImode);
5861 rtx done_label = gen_label_rtx ();
5862 rtx loop_label = gen_label_rtx ();
5863 rtx exit_label = gen_label_rtx ();
5864 rtx full_label = gen_label_rtx ();
5865
5866 /* Perform a quick check for string ending on the first up to 16
5867 bytes and exit early if successful. */
5868
5869 emit_insn (gen_vlbb (vsrc, src, GEN_INT (6)));
5870 emit_insn (gen_lcbb (loadlen, src_addr, GEN_INT (6)));
5871 emit_insn (gen_vfenezv16qi (vpos, vsrc, vsrc));
5872 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5873 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5874 /* gpos is the byte index if a zero was found and 16 otherwise.
5875 So if it is lower than the loaded bytes we have a hit. */
5876 emit_cmp_and_jump_insns (gpos, loadlen, GE, NULL_RTX, SImode, 1,
5877 full_label);
5878 emit_insn (gen_vstlv16qi (vsrc, gpos, dst));
5879
5880 force_expand_binop (Pmode, add_optab, dst_addr, gpos, result,
5881 1, OPTAB_DIRECT);
5882 emit_jump (exit_label);
5883 emit_barrier ();
5884
5885 emit_label (full_label);
5886 LABEL_NUSES (full_label) = 1;
5887
5888 /* Calculate `offset' so that src + offset points to the last byte
5889 before 16 byte alignment. */
5890
5891 /* temp = src_addr & 0xf */
5892 force_expand_binop (Pmode, and_optab, src_addr, GEN_INT (15), temp,
5893 1, OPTAB_DIRECT);
5894
5895 /* offset = 0xf - temp */
5896 emit_move_insn (offset, GEN_INT (15));
5897 force_expand_binop (Pmode, sub_optab, offset, temp, offset,
5898 1, OPTAB_DIRECT);
5899
5900 /* Store `offset' bytes in the dstination string. The quick check
5901 has loaded at least `offset' bytes into vsrc. */
5902
5903 emit_insn (gen_vstlv16qi (vsrc, gen_lowpart (SImode, offset), dst));
5904
5905 /* Advance to the next byte to be loaded. */
5906 force_expand_binop (Pmode, add_optab, offset, const1_rtx, offset,
5907 1, OPTAB_DIRECT);
5908
5909 /* Make sure the addresses are single regs which can be used as a
5910 base. */
5911 emit_move_insn (src_addr_reg, src_addr);
5912 emit_move_insn (dst_addr_reg, dst_addr);
5913
5914 /* MAIN LOOP */
5915
5916 emit_label (loop_label);
5917 LABEL_NUSES (loop_label) = 1;
5918
5919 emit_move_insn (vsrc,
5920 gen_rtx_MEM (V16QImode,
5921 gen_rtx_PLUS (Pmode, src_addr_reg, offset)));
5922
5923 emit_insn (gen_vec_vfenesv16qi (vpos, vsrc, vsrc,
5924 GEN_INT (VSTRING_FLAG_ZS | VSTRING_FLAG_CS)));
5925 add_int_reg_note (s390_emit_ccraw_jump (8, EQ, done_label),
5926 REG_BR_PROB, profile_probability::very_unlikely ()
5927 .to_reg_br_prob_note ());
5928
5929 emit_move_insn (gen_rtx_MEM (V16QImode,
5930 gen_rtx_PLUS (Pmode, dst_addr_reg, offset)),
5931 vsrc);
5932 /* offset += 16 */
5933 force_expand_binop (Pmode, add_optab, offset, GEN_INT (16),
5934 offset, 1, OPTAB_DIRECT);
5935
5936 emit_jump (loop_label);
5937 emit_barrier ();
5938
5939 /* REGULAR EXIT */
5940
5941 /* We are done. Add the offset of the zero character to the dst_addr
5942 pointer to get the result. */
5943
5944 emit_label (done_label);
5945 LABEL_NUSES (done_label) = 1;
5946
5947 force_expand_binop (Pmode, add_optab, dst_addr_reg, offset, dst_addr_reg,
5948 1, OPTAB_DIRECT);
5949
5950 emit_insn (gen_vec_extractv16qiqi (gpos_qi, vpos, GEN_INT (7)));
5951 emit_move_insn (gpos, gen_rtx_SUBREG (SImode, gpos_qi, 0));
5952
5953 emit_insn (gen_vstlv16qi (vsrc, gpos, gen_rtx_MEM (BLKmode, dst_addr_reg)));
5954
5955 force_expand_binop (Pmode, add_optab, dst_addr_reg, gpos, result,
5956 1, OPTAB_DIRECT);
5957
5958 /* EARLY EXIT */
5959
5960 emit_label (exit_label);
5961 LABEL_NUSES (exit_label) = 1;
5962 }
5963
5964
5965 /* Expand conditional increment or decrement using alc/slb instructions.
5966 Should generate code setting DST to either SRC or SRC + INCREMENT,
5967 depending on the result of the comparison CMP_OP0 CMP_CODE CMP_OP1.
5968 Returns true if successful, false otherwise.
5969
5970 That makes it possible to implement some if-constructs without jumps e.g.:
5971 (borrow = CC0 | CC1 and carry = CC2 | CC3)
5972 unsigned int a, b, c;
5973 if (a < b) c++; -> CCU b > a -> CC2; c += carry;
5974 if (a < b) c--; -> CCL3 a - b -> borrow; c -= borrow;
5975 if (a <= b) c++; -> CCL3 b - a -> borrow; c += carry;
5976 if (a <= b) c--; -> CCU a <= b -> borrow; c -= borrow;
5977
5978 Checks for EQ and NE with a nonzero value need an additional xor e.g.:
5979 if (a == b) c++; -> CCL3 a ^= b; 0 - a -> borrow; c += carry;
5980 if (a == b) c--; -> CCU a ^= b; a <= 0 -> CC0 | CC1; c -= borrow;
5981 if (a != b) c++; -> CCU a ^= b; a > 0 -> CC2; c += carry;
5982 if (a != b) c--; -> CCL3 a ^= b; 0 - a -> borrow; c -= borrow; */
5983
5984 bool
5985 s390_expand_addcc (enum rtx_code cmp_code, rtx cmp_op0, rtx cmp_op1,
5986 rtx dst, rtx src, rtx increment)
5987 {
5988 machine_mode cmp_mode;
5989 machine_mode cc_mode;
5990 rtx op_res;
5991 rtx insn;
5992 rtvec p;
5993 int ret;
5994
5995 if ((GET_MODE (cmp_op0) == SImode || GET_MODE (cmp_op0) == VOIDmode)
5996 && (GET_MODE (cmp_op1) == SImode || GET_MODE (cmp_op1) == VOIDmode))
5997 cmp_mode = SImode;
5998 else if ((GET_MODE (cmp_op0) == DImode || GET_MODE (cmp_op0) == VOIDmode)
5999 && (GET_MODE (cmp_op1) == DImode || GET_MODE (cmp_op1) == VOIDmode))
6000 cmp_mode = DImode;
6001 else
6002 return false;
6003
6004 /* Try ADD LOGICAL WITH CARRY. */
6005 if (increment == const1_rtx)
6006 {
6007 /* Determine CC mode to use. */
6008 if (cmp_code == EQ || cmp_code == NE)
6009 {
6010 if (cmp_op1 != const0_rtx)
6011 {
6012 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6013 NULL_RTX, 0, OPTAB_WIDEN);
6014 cmp_op1 = const0_rtx;
6015 }
6016
6017 cmp_code = cmp_code == EQ ? LEU : GTU;
6018 }
6019
6020 if (cmp_code == LTU || cmp_code == LEU)
6021 {
6022 rtx tem = cmp_op0;
6023 cmp_op0 = cmp_op1;
6024 cmp_op1 = tem;
6025 cmp_code = swap_condition (cmp_code);
6026 }
6027
6028 switch (cmp_code)
6029 {
6030 case GTU:
6031 cc_mode = CCUmode;
6032 break;
6033
6034 case GEU:
6035 cc_mode = CCL3mode;
6036 break;
6037
6038 default:
6039 return false;
6040 }
6041
6042 /* Emit comparison instruction pattern. */
6043 if (!register_operand (cmp_op0, cmp_mode))
6044 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6045
6046 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6047 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6048 /* We use insn_invalid_p here to add clobbers if required. */
6049 ret = insn_invalid_p (emit_insn (insn), false);
6050 gcc_assert (!ret);
6051
6052 /* Emit ALC instruction pattern. */
6053 op_res = gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6054 gen_rtx_REG (cc_mode, CC_REGNUM),
6055 const0_rtx);
6056
6057 if (src != const0_rtx)
6058 {
6059 if (!register_operand (src, GET_MODE (dst)))
6060 src = force_reg (GET_MODE (dst), src);
6061
6062 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, src);
6063 op_res = gen_rtx_PLUS (GET_MODE (dst), op_res, const0_rtx);
6064 }
6065
6066 p = rtvec_alloc (2);
6067 RTVEC_ELT (p, 0) =
6068 gen_rtx_SET (dst, op_res);
6069 RTVEC_ELT (p, 1) =
6070 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6071 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6072
6073 return true;
6074 }
6075
6076 /* Try SUBTRACT LOGICAL WITH BORROW. */
6077 if (increment == constm1_rtx)
6078 {
6079 /* Determine CC mode to use. */
6080 if (cmp_code == EQ || cmp_code == NE)
6081 {
6082 if (cmp_op1 != const0_rtx)
6083 {
6084 cmp_op0 = expand_simple_binop (cmp_mode, XOR, cmp_op0, cmp_op1,
6085 NULL_RTX, 0, OPTAB_WIDEN);
6086 cmp_op1 = const0_rtx;
6087 }
6088
6089 cmp_code = cmp_code == EQ ? LEU : GTU;
6090 }
6091
6092 if (cmp_code == GTU || cmp_code == GEU)
6093 {
6094 rtx tem = cmp_op0;
6095 cmp_op0 = cmp_op1;
6096 cmp_op1 = tem;
6097 cmp_code = swap_condition (cmp_code);
6098 }
6099
6100 switch (cmp_code)
6101 {
6102 case LEU:
6103 cc_mode = CCUmode;
6104 break;
6105
6106 case LTU:
6107 cc_mode = CCL3mode;
6108 break;
6109
6110 default:
6111 return false;
6112 }
6113
6114 /* Emit comparison instruction pattern. */
6115 if (!register_operand (cmp_op0, cmp_mode))
6116 cmp_op0 = force_reg (cmp_mode, cmp_op0);
6117
6118 insn = gen_rtx_SET (gen_rtx_REG (cc_mode, CC_REGNUM),
6119 gen_rtx_COMPARE (cc_mode, cmp_op0, cmp_op1));
6120 /* We use insn_invalid_p here to add clobbers if required. */
6121 ret = insn_invalid_p (emit_insn (insn), false);
6122 gcc_assert (!ret);
6123
6124 /* Emit SLB instruction pattern. */
6125 if (!register_operand (src, GET_MODE (dst)))
6126 src = force_reg (GET_MODE (dst), src);
6127
6128 op_res = gen_rtx_MINUS (GET_MODE (dst),
6129 gen_rtx_MINUS (GET_MODE (dst), src, const0_rtx),
6130 gen_rtx_fmt_ee (cmp_code, GET_MODE (dst),
6131 gen_rtx_REG (cc_mode, CC_REGNUM),
6132 const0_rtx));
6133 p = rtvec_alloc (2);
6134 RTVEC_ELT (p, 0) =
6135 gen_rtx_SET (dst, op_res);
6136 RTVEC_ELT (p, 1) =
6137 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6138 emit_insn (gen_rtx_PARALLEL (VOIDmode, p));
6139
6140 return true;
6141 }
6142
6143 return false;
6144 }
6145
6146 /* Expand code for the insv template. Return true if successful. */
6147
6148 bool
6149 s390_expand_insv (rtx dest, rtx op1, rtx op2, rtx src)
6150 {
6151 int bitsize = INTVAL (op1);
6152 int bitpos = INTVAL (op2);
6153 machine_mode mode = GET_MODE (dest);
6154 machine_mode smode;
6155 int smode_bsize, mode_bsize;
6156 rtx op, clobber;
6157
6158 if (bitsize + bitpos > GET_MODE_BITSIZE (mode))
6159 return false;
6160
6161 /* Generate INSERT IMMEDIATE (IILL et al). */
6162 /* (set (ze (reg)) (const_int)). */
6163 if (TARGET_ZARCH
6164 && register_operand (dest, word_mode)
6165 && (bitpos % 16) == 0
6166 && (bitsize % 16) == 0
6167 && const_int_operand (src, VOIDmode))
6168 {
6169 HOST_WIDE_INT val = INTVAL (src);
6170 int regpos = bitpos + bitsize;
6171
6172 while (regpos > bitpos)
6173 {
6174 machine_mode putmode;
6175 int putsize;
6176
6177 if (TARGET_EXTIMM && (regpos % 32 == 0) && (regpos >= bitpos + 32))
6178 putmode = SImode;
6179 else
6180 putmode = HImode;
6181
6182 putsize = GET_MODE_BITSIZE (putmode);
6183 regpos -= putsize;
6184 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6185 GEN_INT (putsize),
6186 GEN_INT (regpos)),
6187 gen_int_mode (val, putmode));
6188 val >>= putsize;
6189 }
6190 gcc_assert (regpos == bitpos);
6191 return true;
6192 }
6193
6194 smode = smallest_int_mode_for_size (bitsize);
6195 smode_bsize = GET_MODE_BITSIZE (smode);
6196 mode_bsize = GET_MODE_BITSIZE (mode);
6197
6198 /* Generate STORE CHARACTERS UNDER MASK (STCM et al). */
6199 if (bitpos == 0
6200 && (bitsize % BITS_PER_UNIT) == 0
6201 && MEM_P (dest)
6202 && (register_operand (src, word_mode)
6203 || const_int_operand (src, VOIDmode)))
6204 {
6205 /* Emit standard pattern if possible. */
6206 if (smode_bsize == bitsize)
6207 {
6208 emit_move_insn (adjust_address (dest, smode, 0),
6209 gen_lowpart (smode, src));
6210 return true;
6211 }
6212
6213 /* (set (ze (mem)) (const_int)). */
6214 else if (const_int_operand (src, VOIDmode))
6215 {
6216 int size = bitsize / BITS_PER_UNIT;
6217 rtx src_mem = adjust_address (force_const_mem (word_mode, src),
6218 BLKmode,
6219 UNITS_PER_WORD - size);
6220
6221 dest = adjust_address (dest, BLKmode, 0);
6222 set_mem_size (dest, size);
6223 s390_expand_movmem (dest, src_mem, GEN_INT (size));
6224 return true;
6225 }
6226
6227 /* (set (ze (mem)) (reg)). */
6228 else if (register_operand (src, word_mode))
6229 {
6230 if (bitsize <= 32)
6231 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest, op1,
6232 const0_rtx), src);
6233 else
6234 {
6235 /* Emit st,stcmh sequence. */
6236 int stcmh_width = bitsize - 32;
6237 int size = stcmh_width / BITS_PER_UNIT;
6238
6239 emit_move_insn (adjust_address (dest, SImode, size),
6240 gen_lowpart (SImode, src));
6241 set_mem_size (dest, size);
6242 emit_move_insn (gen_rtx_ZERO_EXTRACT (word_mode, dest,
6243 GEN_INT (stcmh_width),
6244 const0_rtx),
6245 gen_rtx_LSHIFTRT (word_mode, src, GEN_INT (32)));
6246 }
6247 return true;
6248 }
6249 }
6250
6251 /* Generate INSERT CHARACTERS UNDER MASK (IC, ICM et al). */
6252 if ((bitpos % BITS_PER_UNIT) == 0
6253 && (bitsize % BITS_PER_UNIT) == 0
6254 && (bitpos & 32) == ((bitpos + bitsize - 1) & 32)
6255 && MEM_P (src)
6256 && (mode == DImode || mode == SImode)
6257 && register_operand (dest, mode))
6258 {
6259 /* Emit a strict_low_part pattern if possible. */
6260 if (smode_bsize == bitsize && bitpos == mode_bsize - smode_bsize)
6261 {
6262 op = gen_rtx_STRICT_LOW_PART (VOIDmode, gen_lowpart (smode, dest));
6263 op = gen_rtx_SET (op, gen_lowpart (smode, src));
6264 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6265 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber)));
6266 return true;
6267 }
6268
6269 /* ??? There are more powerful versions of ICM that are not
6270 completely represented in the md file. */
6271 }
6272
6273 /* For z10, generate ROTATE THEN INSERT SELECTED BITS (RISBG et al). */
6274 if (TARGET_Z10 && (mode == DImode || mode == SImode))
6275 {
6276 machine_mode mode_s = GET_MODE (src);
6277
6278 if (CONSTANT_P (src))
6279 {
6280 /* For constant zero values the representation with AND
6281 appears to be folded in more situations than the (set
6282 (zero_extract) ...).
6283 We only do this when the start and end of the bitfield
6284 remain in the same SImode chunk. That way nihf or nilf
6285 can be used.
6286 The AND patterns might still generate a risbg for this. */
6287 if (src == const0_rtx && bitpos / 32 == (bitpos + bitsize - 1) / 32)
6288 return false;
6289 else
6290 src = force_reg (mode, src);
6291 }
6292 else if (mode_s != mode)
6293 {
6294 gcc_assert (GET_MODE_BITSIZE (mode_s) >= bitsize);
6295 src = force_reg (mode_s, src);
6296 src = gen_lowpart (mode, src);
6297 }
6298
6299 op = gen_rtx_ZERO_EXTRACT (mode, dest, op1, op2),
6300 op = gen_rtx_SET (op, src);
6301
6302 if (!TARGET_ZEC12)
6303 {
6304 clobber = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, CC_REGNUM));
6305 op = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clobber));
6306 }
6307 emit_insn (op);
6308
6309 return true;
6310 }
6311
6312 return false;
6313 }
6314
6315 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic which returns a
6316 register that holds VAL of mode MODE shifted by COUNT bits. */
6317
6318 static inline rtx
6319 s390_expand_mask_and_shift (rtx val, machine_mode mode, rtx count)
6320 {
6321 val = expand_simple_binop (SImode, AND, val, GEN_INT (GET_MODE_MASK (mode)),
6322 NULL_RTX, 1, OPTAB_DIRECT);
6323 return expand_simple_binop (SImode, ASHIFT, val, count,
6324 NULL_RTX, 1, OPTAB_DIRECT);
6325 }
6326
6327 /* Generate a vector comparison COND of CMP_OP1 and CMP_OP2 and store
6328 the result in TARGET. */
6329
6330 void
6331 s390_expand_vec_compare (rtx target, enum rtx_code cond,
6332 rtx cmp_op1, rtx cmp_op2)
6333 {
6334 machine_mode mode = GET_MODE (target);
6335 bool neg_p = false, swap_p = false;
6336 rtx tmp;
6337
6338 if (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_VECTOR_FLOAT)
6339 {
6340 switch (cond)
6341 {
6342 /* NE a != b -> !(a == b) */
6343 case NE: cond = EQ; neg_p = true; break;
6344 /* UNGT a u> b -> !(b >= a) */
6345 case UNGT: cond = GE; neg_p = true; swap_p = true; break;
6346 /* UNGE a u>= b -> !(b > a) */
6347 case UNGE: cond = GT; neg_p = true; swap_p = true; break;
6348 /* LE: a <= b -> b >= a */
6349 case LE: cond = GE; swap_p = true; break;
6350 /* UNLE: a u<= b -> !(a > b) */
6351 case UNLE: cond = GT; neg_p = true; break;
6352 /* LT: a < b -> b > a */
6353 case LT: cond = GT; swap_p = true; break;
6354 /* UNLT: a u< b -> !(a >= b) */
6355 case UNLT: cond = GE; neg_p = true; break;
6356 case UNEQ:
6357 emit_insn (gen_vec_cmpuneqv2df (target, cmp_op1, cmp_op2));
6358 return;
6359 case LTGT:
6360 emit_insn (gen_vec_cmpltgtv2df (target, cmp_op1, cmp_op2));
6361 return;
6362 case ORDERED:
6363 emit_insn (gen_vec_orderedv2df (target, cmp_op1, cmp_op2));
6364 return;
6365 case UNORDERED:
6366 emit_insn (gen_vec_unorderedv2df (target, cmp_op1, cmp_op2));
6367 return;
6368 default: break;
6369 }
6370 }
6371 else
6372 {
6373 switch (cond)
6374 {
6375 /* NE: a != b -> !(a == b) */
6376 case NE: cond = EQ; neg_p = true; break;
6377 /* GE: a >= b -> !(b > a) */
6378 case GE: cond = GT; neg_p = true; swap_p = true; break;
6379 /* GEU: a >= b -> !(b > a) */
6380 case GEU: cond = GTU; neg_p = true; swap_p = true; break;
6381 /* LE: a <= b -> !(a > b) */
6382 case LE: cond = GT; neg_p = true; break;
6383 /* LEU: a <= b -> !(a > b) */
6384 case LEU: cond = GTU; neg_p = true; break;
6385 /* LT: a < b -> b > a */
6386 case LT: cond = GT; swap_p = true; break;
6387 /* LTU: a < b -> b > a */
6388 case LTU: cond = GTU; swap_p = true; break;
6389 default: break;
6390 }
6391 }
6392
6393 if (swap_p)
6394 {
6395 tmp = cmp_op1; cmp_op1 = cmp_op2; cmp_op2 = tmp;
6396 }
6397
6398 emit_insn (gen_rtx_SET (target, gen_rtx_fmt_ee (cond,
6399 mode,
6400 cmp_op1, cmp_op2)));
6401 if (neg_p)
6402 emit_insn (gen_rtx_SET (target, gen_rtx_NOT (mode, target)));
6403 }
6404
6405 /* Expand the comparison CODE of CMP1 and CMP2 and copy 1 or 0 into
6406 TARGET if either all (ALL_P is true) or any (ALL_P is false) of the
6407 elements in CMP1 and CMP2 fulfill the comparison.
6408 This function is only used to emit patterns for the vx builtins and
6409 therefore only handles comparison codes required by the
6410 builtins. */
6411 void
6412 s390_expand_vec_compare_cc (rtx target, enum rtx_code code,
6413 rtx cmp1, rtx cmp2, bool all_p)
6414 {
6415 machine_mode cc_producer_mode, cc_consumer_mode, scratch_mode;
6416 rtx tmp_reg = gen_reg_rtx (SImode);
6417 bool swap_p = false;
6418
6419 if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_INT)
6420 {
6421 switch (code)
6422 {
6423 case EQ:
6424 case NE:
6425 cc_producer_mode = CCVEQmode;
6426 break;
6427 case GE:
6428 case LT:
6429 code = swap_condition (code);
6430 swap_p = true;
6431 /* fallthrough */
6432 case GT:
6433 case LE:
6434 cc_producer_mode = CCVIHmode;
6435 break;
6436 case GEU:
6437 case LTU:
6438 code = swap_condition (code);
6439 swap_p = true;
6440 /* fallthrough */
6441 case GTU:
6442 case LEU:
6443 cc_producer_mode = CCVIHUmode;
6444 break;
6445 default:
6446 gcc_unreachable ();
6447 }
6448
6449 scratch_mode = GET_MODE (cmp1);
6450 /* These codes represent inverted CC interpretations. Inverting
6451 an ALL CC mode results in an ANY CC mode and the other way
6452 around. Invert the all_p flag here to compensate for
6453 that. */
6454 if (code == NE || code == LE || code == LEU)
6455 all_p = !all_p;
6456
6457 cc_consumer_mode = all_p ? CCVIALLmode : CCVIANYmode;
6458 }
6459 else if (GET_MODE_CLASS (GET_MODE (cmp1)) == MODE_VECTOR_FLOAT)
6460 {
6461 bool inv_p = false;
6462
6463 switch (code)
6464 {
6465 case EQ: cc_producer_mode = CCVEQmode; break;
6466 case NE: cc_producer_mode = CCVEQmode; inv_p = true; break;
6467 case GT: cc_producer_mode = CCVFHmode; break;
6468 case GE: cc_producer_mode = CCVFHEmode; break;
6469 case UNLE: cc_producer_mode = CCVFHmode; inv_p = true; break;
6470 case UNLT: cc_producer_mode = CCVFHEmode; inv_p = true; break;
6471 case LT: cc_producer_mode = CCVFHmode; code = GT; swap_p = true; break;
6472 case LE: cc_producer_mode = CCVFHEmode; code = GE; swap_p = true; break;
6473 default: gcc_unreachable ();
6474 }
6475 scratch_mode = mode_for_vector
6476 (int_mode_for_mode (GET_MODE_INNER (GET_MODE (cmp1))).require (),
6477 GET_MODE_NUNITS (GET_MODE (cmp1)));
6478 gcc_assert (scratch_mode != BLKmode);
6479
6480 if (inv_p)
6481 all_p = !all_p;
6482
6483 cc_consumer_mode = all_p ? CCVFALLmode : CCVFANYmode;
6484 }
6485 else
6486 gcc_unreachable ();
6487
6488 if (swap_p)
6489 {
6490 rtx tmp = cmp2;
6491 cmp2 = cmp1;
6492 cmp1 = tmp;
6493 }
6494
6495 emit_insn (gen_rtx_PARALLEL (VOIDmode,
6496 gen_rtvec (2, gen_rtx_SET (
6497 gen_rtx_REG (cc_producer_mode, CC_REGNUM),
6498 gen_rtx_COMPARE (cc_producer_mode, cmp1, cmp2)),
6499 gen_rtx_CLOBBER (VOIDmode,
6500 gen_rtx_SCRATCH (scratch_mode)))));
6501 emit_move_insn (target, const0_rtx);
6502 emit_move_insn (tmp_reg, const1_rtx);
6503
6504 emit_move_insn (target,
6505 gen_rtx_IF_THEN_ELSE (SImode,
6506 gen_rtx_fmt_ee (code, VOIDmode,
6507 gen_rtx_REG (cc_consumer_mode, CC_REGNUM),
6508 const0_rtx),
6509 tmp_reg, target));
6510 }
6511
6512 /* Invert the comparison CODE applied to a CC mode. This is only safe
6513 if we know whether there result was created by a floating point
6514 compare or not. For the CCV modes this is encoded as part of the
6515 mode. */
6516 enum rtx_code
6517 s390_reverse_condition (machine_mode mode, enum rtx_code code)
6518 {
6519 /* Reversal of FP compares takes care -- an ordered compare
6520 becomes an unordered compare and vice versa. */
6521 if (mode == CCVFALLmode || mode == CCVFANYmode)
6522 return reverse_condition_maybe_unordered (code);
6523 else if (mode == CCVIALLmode || mode == CCVIANYmode)
6524 return reverse_condition (code);
6525 else
6526 gcc_unreachable ();
6527 }
6528
6529 /* Generate a vector comparison expression loading either elements of
6530 THEN or ELS into TARGET depending on the comparison COND of CMP_OP1
6531 and CMP_OP2. */
6532
6533 void
6534 s390_expand_vcond (rtx target, rtx then, rtx els,
6535 enum rtx_code cond, rtx cmp_op1, rtx cmp_op2)
6536 {
6537 rtx tmp;
6538 machine_mode result_mode;
6539 rtx result_target;
6540
6541 machine_mode target_mode = GET_MODE (target);
6542 machine_mode cmp_mode = GET_MODE (cmp_op1);
6543 rtx op = (cond == LT) ? els : then;
6544
6545 /* Try to optimize x < 0 ? -1 : 0 into (signed) x >> 31
6546 and x < 0 ? 1 : 0 into (unsigned) x >> 31. Likewise
6547 for short and byte (x >> 15 and x >> 7 respectively). */
6548 if ((cond == LT || cond == GE)
6549 && target_mode == cmp_mode
6550 && cmp_op2 == CONST0_RTX (cmp_mode)
6551 && op == CONST0_RTX (target_mode)
6552 && s390_vector_mode_supported_p (target_mode)
6553 && GET_MODE_CLASS (target_mode) == MODE_VECTOR_INT)
6554 {
6555 rtx negop = (cond == LT) ? then : els;
6556
6557 int shift = GET_MODE_BITSIZE (GET_MODE_INNER (target_mode)) - 1;
6558
6559 /* if x < 0 ? 1 : 0 or if x >= 0 ? 0 : 1 */
6560 if (negop == CONST1_RTX (target_mode))
6561 {
6562 rtx res = expand_simple_binop (cmp_mode, LSHIFTRT, cmp_op1,
6563 GEN_INT (shift), target,
6564 1, OPTAB_DIRECT);
6565 if (res != target)
6566 emit_move_insn (target, res);
6567 return;
6568 }
6569
6570 /* if x < 0 ? -1 : 0 or if x >= 0 ? 0 : -1 */
6571 else if (all_ones_operand (negop, target_mode))
6572 {
6573 rtx res = expand_simple_binop (cmp_mode, ASHIFTRT, cmp_op1,
6574 GEN_INT (shift), target,
6575 0, OPTAB_DIRECT);
6576 if (res != target)
6577 emit_move_insn (target, res);
6578 return;
6579 }
6580 }
6581
6582 /* We always use an integral type vector to hold the comparison
6583 result. */
6584 result_mode = mode_for_vector
6585 (int_mode_for_mode (GET_MODE_INNER (cmp_mode)).require (),
6586 GET_MODE_NUNITS (cmp_mode));
6587 result_target = gen_reg_rtx (result_mode);
6588
6589 /* We allow vector immediates as comparison operands that
6590 can be handled by the optimization above but not by the
6591 following code. Hence, force them into registers here. */
6592 if (!REG_P (cmp_op1))
6593 cmp_op1 = force_reg (GET_MODE (cmp_op1), cmp_op1);
6594
6595 if (!REG_P (cmp_op2))
6596 cmp_op2 = force_reg (GET_MODE (cmp_op2), cmp_op2);
6597
6598 s390_expand_vec_compare (result_target, cond,
6599 cmp_op1, cmp_op2);
6600
6601 /* If the results are supposed to be either -1 or 0 we are done
6602 since this is what our compare instructions generate anyway. */
6603 if (all_ones_operand (then, GET_MODE (then))
6604 && const0_operand (els, GET_MODE (els)))
6605 {
6606 emit_move_insn (target, gen_rtx_SUBREG (target_mode,
6607 result_target, 0));
6608 return;
6609 }
6610
6611 /* Otherwise we will do a vsel afterwards. */
6612 /* This gets triggered e.g.
6613 with gcc.c-torture/compile/pr53410-1.c */
6614 if (!REG_P (then))
6615 then = force_reg (target_mode, then);
6616
6617 if (!REG_P (els))
6618 els = force_reg (target_mode, els);
6619
6620 tmp = gen_rtx_fmt_ee (EQ, VOIDmode,
6621 result_target,
6622 CONST0_RTX (result_mode));
6623
6624 /* We compared the result against zero above so we have to swap then
6625 and els here. */
6626 tmp = gen_rtx_IF_THEN_ELSE (target_mode, tmp, els, then);
6627
6628 gcc_assert (target_mode == GET_MODE (then));
6629 emit_insn (gen_rtx_SET (target, tmp));
6630 }
6631
6632 /* Emit the RTX necessary to initialize the vector TARGET with values
6633 in VALS. */
6634 void
6635 s390_expand_vec_init (rtx target, rtx vals)
6636 {
6637 machine_mode mode = GET_MODE (target);
6638 machine_mode inner_mode = GET_MODE_INNER (mode);
6639 int n_elts = GET_MODE_NUNITS (mode);
6640 bool all_same = true, all_regs = true, all_const_int = true;
6641 rtx x;
6642 int i;
6643
6644 for (i = 0; i < n_elts; ++i)
6645 {
6646 x = XVECEXP (vals, 0, i);
6647
6648 if (!CONST_INT_P (x))
6649 all_const_int = false;
6650
6651 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
6652 all_same = false;
6653
6654 if (!REG_P (x))
6655 all_regs = false;
6656 }
6657
6658 /* Use vector gen mask or vector gen byte mask if possible. */
6659 if (all_same && all_const_int
6660 && (XVECEXP (vals, 0, 0) == const0_rtx
6661 || s390_contiguous_bitmask_vector_p (XVECEXP (vals, 0, 0),
6662 NULL, NULL)
6663 || s390_bytemask_vector_p (XVECEXP (vals, 0, 0), NULL)))
6664 {
6665 emit_insn (gen_rtx_SET (target,
6666 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0))));
6667 return;
6668 }
6669
6670 if (all_same)
6671 {
6672 emit_insn (gen_rtx_SET (target,
6673 gen_rtx_VEC_DUPLICATE (mode,
6674 XVECEXP (vals, 0, 0))));
6675 return;
6676 }
6677
6678 if (all_regs
6679 && REG_P (target)
6680 && n_elts == 2
6681 && GET_MODE_SIZE (inner_mode) == 8)
6682 {
6683 /* Use vector load pair. */
6684 emit_insn (gen_rtx_SET (target,
6685 gen_rtx_VEC_CONCAT (mode,
6686 XVECEXP (vals, 0, 0),
6687 XVECEXP (vals, 0, 1))));
6688 return;
6689 }
6690
6691 /* Use vector load logical element and zero. */
6692 if (TARGET_VXE && (mode == V4SImode || mode == V4SFmode))
6693 {
6694 bool found = true;
6695
6696 x = XVECEXP (vals, 0, 0);
6697 if (memory_operand (x, inner_mode))
6698 {
6699 for (i = 1; i < n_elts; ++i)
6700 found = found && XVECEXP (vals, 0, i) == const0_rtx;
6701
6702 if (found)
6703 {
6704 machine_mode half_mode = (inner_mode == SFmode
6705 ? V2SFmode : V2SImode);
6706 emit_insn (gen_rtx_SET (target,
6707 gen_rtx_VEC_CONCAT (mode,
6708 gen_rtx_VEC_CONCAT (half_mode,
6709 x,
6710 const0_rtx),
6711 gen_rtx_VEC_CONCAT (half_mode,
6712 const0_rtx,
6713 const0_rtx))));
6714 return;
6715 }
6716 }
6717 }
6718
6719 /* We are about to set the vector elements one by one. Zero out the
6720 full register first in order to help the data flow framework to
6721 detect it as full VR set. */
6722 emit_insn (gen_rtx_SET (target, CONST0_RTX (mode)));
6723
6724 /* Unfortunately the vec_init expander is not allowed to fail. So
6725 we have to implement the fallback ourselves. */
6726 for (i = 0; i < n_elts; i++)
6727 {
6728 rtx elem = XVECEXP (vals, 0, i);
6729 if (!general_operand (elem, GET_MODE (elem)))
6730 elem = force_reg (inner_mode, elem);
6731
6732 emit_insn (gen_rtx_SET (target,
6733 gen_rtx_UNSPEC (mode,
6734 gen_rtvec (3, elem,
6735 GEN_INT (i), target),
6736 UNSPEC_VEC_SET)));
6737 }
6738 }
6739
6740 /* Structure to hold the initial parameters for a compare_and_swap operation
6741 in HImode and QImode. */
6742
6743 struct alignment_context
6744 {
6745 rtx memsi; /* SI aligned memory location. */
6746 rtx shift; /* Bit offset with regard to lsb. */
6747 rtx modemask; /* Mask of the HQImode shifted by SHIFT bits. */
6748 rtx modemaski; /* ~modemask */
6749 bool aligned; /* True if memory is aligned, false else. */
6750 };
6751
6752 /* A subroutine of s390_expand_cs_hqi and s390_expand_atomic to initialize
6753 structure AC for transparent simplifying, if the memory alignment is known
6754 to be at least 32bit. MEM is the memory location for the actual operation
6755 and MODE its mode. */
6756
6757 static void
6758 init_alignment_context (struct alignment_context *ac, rtx mem,
6759 machine_mode mode)
6760 {
6761 ac->shift = GEN_INT (GET_MODE_SIZE (SImode) - GET_MODE_SIZE (mode));
6762 ac->aligned = (MEM_ALIGN (mem) >= GET_MODE_BITSIZE (SImode));
6763
6764 if (ac->aligned)
6765 ac->memsi = adjust_address (mem, SImode, 0); /* Memory is aligned. */
6766 else
6767 {
6768 /* Alignment is unknown. */
6769 rtx byteoffset, addr, align;
6770
6771 /* Force the address into a register. */
6772 addr = force_reg (Pmode, XEXP (mem, 0));
6773
6774 /* Align it to SImode. */
6775 align = expand_simple_binop (Pmode, AND, addr,
6776 GEN_INT (-GET_MODE_SIZE (SImode)),
6777 NULL_RTX, 1, OPTAB_DIRECT);
6778 /* Generate MEM. */
6779 ac->memsi = gen_rtx_MEM (SImode, align);
6780 MEM_VOLATILE_P (ac->memsi) = MEM_VOLATILE_P (mem);
6781 set_mem_alias_set (ac->memsi, ALIAS_SET_MEMORY_BARRIER);
6782 set_mem_align (ac->memsi, GET_MODE_BITSIZE (SImode));
6783
6784 /* Calculate shiftcount. */
6785 byteoffset = expand_simple_binop (Pmode, AND, addr,
6786 GEN_INT (GET_MODE_SIZE (SImode) - 1),
6787 NULL_RTX, 1, OPTAB_DIRECT);
6788 /* As we already have some offset, evaluate the remaining distance. */
6789 ac->shift = expand_simple_binop (SImode, MINUS, ac->shift, byteoffset,
6790 NULL_RTX, 1, OPTAB_DIRECT);
6791 }
6792
6793 /* Shift is the byte count, but we need the bitcount. */
6794 ac->shift = expand_simple_binop (SImode, ASHIFT, ac->shift, GEN_INT (3),
6795 NULL_RTX, 1, OPTAB_DIRECT);
6796
6797 /* Calculate masks. */
6798 ac->modemask = expand_simple_binop (SImode, ASHIFT,
6799 GEN_INT (GET_MODE_MASK (mode)),
6800 ac->shift, NULL_RTX, 1, OPTAB_DIRECT);
6801 ac->modemaski = expand_simple_unop (SImode, NOT, ac->modemask,
6802 NULL_RTX, 1);
6803 }
6804
6805 /* A subroutine of s390_expand_cs_hqi. Insert INS into VAL. If possible,
6806 use a single insv insn into SEQ2. Otherwise, put prep insns in SEQ1 and
6807 perform the merge in SEQ2. */
6808
6809 static rtx
6810 s390_two_part_insv (struct alignment_context *ac, rtx *seq1, rtx *seq2,
6811 machine_mode mode, rtx val, rtx ins)
6812 {
6813 rtx tmp;
6814
6815 if (ac->aligned)
6816 {
6817 start_sequence ();
6818 tmp = copy_to_mode_reg (SImode, val);
6819 if (s390_expand_insv (tmp, GEN_INT (GET_MODE_BITSIZE (mode)),
6820 const0_rtx, ins))
6821 {
6822 *seq1 = NULL;
6823 *seq2 = get_insns ();
6824 end_sequence ();
6825 return tmp;
6826 }
6827 end_sequence ();
6828 }
6829
6830 /* Failed to use insv. Generate a two part shift and mask. */
6831 start_sequence ();
6832 tmp = s390_expand_mask_and_shift (ins, mode, ac->shift);
6833 *seq1 = get_insns ();
6834 end_sequence ();
6835
6836 start_sequence ();
6837 tmp = expand_simple_binop (SImode, IOR, tmp, val, NULL_RTX, 1, OPTAB_DIRECT);
6838 *seq2 = get_insns ();
6839 end_sequence ();
6840
6841 return tmp;
6842 }
6843
6844 /* Expand an atomic compare and swap operation for HImode and QImode. MEM is
6845 the memory location, CMP the old value to compare MEM with and NEW_RTX the
6846 value to set if CMP == MEM. */
6847
6848 static void
6849 s390_expand_cs_hqi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6850 rtx cmp, rtx new_rtx, bool is_weak)
6851 {
6852 struct alignment_context ac;
6853 rtx cmpv, newv, val, cc, seq0, seq1, seq2, seq3;
6854 rtx res = gen_reg_rtx (SImode);
6855 rtx_code_label *csloop = NULL, *csend = NULL;
6856
6857 gcc_assert (MEM_P (mem));
6858
6859 init_alignment_context (&ac, mem, mode);
6860
6861 /* Load full word. Subsequent loads are performed by CS. */
6862 val = expand_simple_binop (SImode, AND, ac.memsi, ac.modemaski,
6863 NULL_RTX, 1, OPTAB_DIRECT);
6864
6865 /* Prepare insertions of cmp and new_rtx into the loaded value. When
6866 possible, we try to use insv to make this happen efficiently. If
6867 that fails we'll generate code both inside and outside the loop. */
6868 cmpv = s390_two_part_insv (&ac, &seq0, &seq2, mode, val, cmp);
6869 newv = s390_two_part_insv (&ac, &seq1, &seq3, mode, val, new_rtx);
6870
6871 if (seq0)
6872 emit_insn (seq0);
6873 if (seq1)
6874 emit_insn (seq1);
6875
6876 /* Start CS loop. */
6877 if (!is_weak)
6878 {
6879 /* Begin assuming success. */
6880 emit_move_insn (btarget, const1_rtx);
6881
6882 csloop = gen_label_rtx ();
6883 csend = gen_label_rtx ();
6884 emit_label (csloop);
6885 }
6886
6887 /* val = "<mem>00..0<mem>"
6888 * cmp = "00..0<cmp>00..0"
6889 * new = "00..0<new>00..0"
6890 */
6891
6892 emit_insn (seq2);
6893 emit_insn (seq3);
6894
6895 cc = s390_emit_compare_and_swap (EQ, res, ac.memsi, cmpv, newv, CCZ1mode);
6896 if (is_weak)
6897 emit_insn (gen_cstorecc4 (btarget, cc, XEXP (cc, 0), XEXP (cc, 1)));
6898 else
6899 {
6900 rtx tmp;
6901
6902 /* Jump to end if we're done (likely?). */
6903 s390_emit_jump (csend, cc);
6904
6905 /* Check for changes outside mode, and loop internal if so.
6906 Arrange the moves so that the compare is adjacent to the
6907 branch so that we can generate CRJ. */
6908 tmp = copy_to_reg (val);
6909 force_expand_binop (SImode, and_optab, res, ac.modemaski, val,
6910 1, OPTAB_DIRECT);
6911 cc = s390_emit_compare (NE, val, tmp);
6912 s390_emit_jump (csloop, cc);
6913
6914 /* Failed. */
6915 emit_move_insn (btarget, const0_rtx);
6916 emit_label (csend);
6917 }
6918
6919 /* Return the correct part of the bitfield. */
6920 convert_move (vtarget, expand_simple_binop (SImode, LSHIFTRT, res, ac.shift,
6921 NULL_RTX, 1, OPTAB_DIRECT), 1);
6922 }
6923
6924 /* Variant of s390_expand_cs for SI, DI and TI modes. */
6925 static void
6926 s390_expand_cs_tdsi (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
6927 rtx cmp, rtx new_rtx, bool is_weak)
6928 {
6929 rtx output = vtarget;
6930 rtx_code_label *skip_cs_label = NULL;
6931 bool do_const_opt = false;
6932
6933 if (!register_operand (output, mode))
6934 output = gen_reg_rtx (mode);
6935
6936 /* If IS_WEAK is true and the INPUT value is a constant, compare the memory
6937 with the constant first and skip the compare_and_swap because its very
6938 expensive and likely to fail anyway.
6939 Note 1: This is done only for IS_WEAK. C11 allows optimizations that may
6940 cause spurious in that case.
6941 Note 2: It may be useful to do this also for non-constant INPUT.
6942 Note 3: Currently only targets with "load on condition" are supported
6943 (z196 and newer). */
6944
6945 if (TARGET_Z196
6946 && (mode == SImode || mode == DImode))
6947 do_const_opt = (is_weak && CONST_INT_P (cmp));
6948
6949 if (do_const_opt)
6950 {
6951 rtx cc = gen_rtx_REG (CCZmode, CC_REGNUM);
6952
6953 skip_cs_label = gen_label_rtx ();
6954 emit_move_insn (btarget, const0_rtx);
6955 if (CONST_INT_P (cmp) && INTVAL (cmp) == 0)
6956 {
6957 rtvec lt = rtvec_alloc (2);
6958
6959 /* Load-and-test + conditional jump. */
6960 RTVEC_ELT (lt, 0)
6961 = gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, mem, cmp));
6962 RTVEC_ELT (lt, 1) = gen_rtx_SET (output, mem);
6963 emit_insn (gen_rtx_PARALLEL (VOIDmode, lt));
6964 }
6965 else
6966 {
6967 emit_move_insn (output, mem);
6968 emit_insn (gen_rtx_SET (cc, gen_rtx_COMPARE (CCZmode, output, cmp)));
6969 }
6970 s390_emit_jump (skip_cs_label, gen_rtx_NE (VOIDmode, cc, const0_rtx));
6971 add_reg_br_prob_note (get_last_insn (),
6972 profile_probability::very_unlikely ());
6973 /* If the jump is not taken, OUTPUT is the expected value. */
6974 cmp = output;
6975 /* Reload newval to a register manually, *after* the compare and jump
6976 above. Otherwise Reload might place it before the jump. */
6977 }
6978 else
6979 cmp = force_reg (mode, cmp);
6980 new_rtx = force_reg (mode, new_rtx);
6981 s390_emit_compare_and_swap (EQ, output, mem, cmp, new_rtx,
6982 (do_const_opt) ? CCZmode : CCZ1mode);
6983 if (skip_cs_label != NULL)
6984 emit_label (skip_cs_label);
6985
6986 /* We deliberately accept non-register operands in the predicate
6987 to ensure the write back to the output operand happens *before*
6988 the store-flags code below. This makes it easier for combine
6989 to merge the store-flags code with a potential test-and-branch
6990 pattern following (immediately!) afterwards. */
6991 if (output != vtarget)
6992 emit_move_insn (vtarget, output);
6993
6994 if (do_const_opt)
6995 {
6996 rtx cc, cond, ite;
6997
6998 /* Do not use gen_cstorecc4 here because it writes either 1 or 0, but
6999 btarget has already been initialized with 0 above. */
7000 cc = gen_rtx_REG (CCZmode, CC_REGNUM);
7001 cond = gen_rtx_EQ (VOIDmode, cc, const0_rtx);
7002 ite = gen_rtx_IF_THEN_ELSE (SImode, cond, const1_rtx, btarget);
7003 emit_insn (gen_rtx_SET (btarget, ite));
7004 }
7005 else
7006 {
7007 rtx cc, cond;
7008
7009 cc = gen_rtx_REG (CCZ1mode, CC_REGNUM);
7010 cond = gen_rtx_EQ (SImode, cc, const0_rtx);
7011 emit_insn (gen_cstorecc4 (btarget, cond, cc, const0_rtx));
7012 }
7013 }
7014
7015 /* Expand an atomic compare and swap operation. MEM is the memory location,
7016 CMP the old value to compare MEM with and NEW_RTX the value to set if
7017 CMP == MEM. */
7018
7019 void
7020 s390_expand_cs (machine_mode mode, rtx btarget, rtx vtarget, rtx mem,
7021 rtx cmp, rtx new_rtx, bool is_weak)
7022 {
7023 switch (mode)
7024 {
7025 case E_TImode:
7026 case E_DImode:
7027 case E_SImode:
7028 s390_expand_cs_tdsi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7029 break;
7030 case E_HImode:
7031 case E_QImode:
7032 s390_expand_cs_hqi (mode, btarget, vtarget, mem, cmp, new_rtx, is_weak);
7033 break;
7034 default:
7035 gcc_unreachable ();
7036 }
7037 }
7038
7039 /* Expand an atomic_exchange operation simulated with a compare-and-swap loop.
7040 The memory location MEM is set to INPUT. OUTPUT is set to the previous value
7041 of MEM. */
7042
7043 void
7044 s390_expand_atomic_exchange_tdsi (rtx output, rtx mem, rtx input)
7045 {
7046 machine_mode mode = GET_MODE (mem);
7047 rtx_code_label *csloop;
7048
7049 if (TARGET_Z196
7050 && (mode == DImode || mode == SImode)
7051 && CONST_INT_P (input) && INTVAL (input) == 0)
7052 {
7053 emit_move_insn (output, const0_rtx);
7054 if (mode == DImode)
7055 emit_insn (gen_atomic_fetch_anddi (output, mem, const0_rtx, input));
7056 else
7057 emit_insn (gen_atomic_fetch_andsi (output, mem, const0_rtx, input));
7058 return;
7059 }
7060
7061 input = force_reg (mode, input);
7062 emit_move_insn (output, mem);
7063 csloop = gen_label_rtx ();
7064 emit_label (csloop);
7065 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, output, mem, output,
7066 input, CCZ1mode));
7067 }
7068
7069 /* Expand an atomic operation CODE of mode MODE. MEM is the memory location
7070 and VAL the value to play with. If AFTER is true then store the value
7071 MEM holds after the operation, if AFTER is false then store the value MEM
7072 holds before the operation. If TARGET is zero then discard that value, else
7073 store it to TARGET. */
7074
7075 void
7076 s390_expand_atomic (machine_mode mode, enum rtx_code code,
7077 rtx target, rtx mem, rtx val, bool after)
7078 {
7079 struct alignment_context ac;
7080 rtx cmp;
7081 rtx new_rtx = gen_reg_rtx (SImode);
7082 rtx orig = gen_reg_rtx (SImode);
7083 rtx_code_label *csloop = gen_label_rtx ();
7084
7085 gcc_assert (!target || register_operand (target, VOIDmode));
7086 gcc_assert (MEM_P (mem));
7087
7088 init_alignment_context (&ac, mem, mode);
7089
7090 /* Shift val to the correct bit positions.
7091 Preserve "icm", but prevent "ex icm". */
7092 if (!(ac.aligned && code == SET && MEM_P (val)))
7093 val = s390_expand_mask_and_shift (val, mode, ac.shift);
7094
7095 /* Further preparation insns. */
7096 if (code == PLUS || code == MINUS)
7097 emit_move_insn (orig, val);
7098 else if (code == MULT || code == AND) /* val = "11..1<val>11..1" */
7099 val = expand_simple_binop (SImode, XOR, val, ac.modemaski,
7100 NULL_RTX, 1, OPTAB_DIRECT);
7101
7102 /* Load full word. Subsequent loads are performed by CS. */
7103 cmp = force_reg (SImode, ac.memsi);
7104
7105 /* Start CS loop. */
7106 emit_label (csloop);
7107 emit_move_insn (new_rtx, cmp);
7108
7109 /* Patch new with val at correct position. */
7110 switch (code)
7111 {
7112 case PLUS:
7113 case MINUS:
7114 val = expand_simple_binop (SImode, code, new_rtx, orig,
7115 NULL_RTX, 1, OPTAB_DIRECT);
7116 val = expand_simple_binop (SImode, AND, val, ac.modemask,
7117 NULL_RTX, 1, OPTAB_DIRECT);
7118 /* FALLTHRU */
7119 case SET:
7120 if (ac.aligned && MEM_P (val))
7121 store_bit_field (new_rtx, GET_MODE_BITSIZE (mode), 0,
7122 0, 0, SImode, val, false);
7123 else
7124 {
7125 new_rtx = expand_simple_binop (SImode, AND, new_rtx, ac.modemaski,
7126 NULL_RTX, 1, OPTAB_DIRECT);
7127 new_rtx = expand_simple_binop (SImode, IOR, new_rtx, val,
7128 NULL_RTX, 1, OPTAB_DIRECT);
7129 }
7130 break;
7131 case AND:
7132 case IOR:
7133 case XOR:
7134 new_rtx = expand_simple_binop (SImode, code, new_rtx, val,
7135 NULL_RTX, 1, OPTAB_DIRECT);
7136 break;
7137 case MULT: /* NAND */
7138 new_rtx = expand_simple_binop (SImode, AND, new_rtx, val,
7139 NULL_RTX, 1, OPTAB_DIRECT);
7140 new_rtx = expand_simple_binop (SImode, XOR, new_rtx, ac.modemask,
7141 NULL_RTX, 1, OPTAB_DIRECT);
7142 break;
7143 default:
7144 gcc_unreachable ();
7145 }
7146
7147 s390_emit_jump (csloop, s390_emit_compare_and_swap (NE, cmp,
7148 ac.memsi, cmp, new_rtx,
7149 CCZ1mode));
7150
7151 /* Return the correct part of the bitfield. */
7152 if (target)
7153 convert_move (target, expand_simple_binop (SImode, LSHIFTRT,
7154 after ? new_rtx : cmp, ac.shift,
7155 NULL_RTX, 1, OPTAB_DIRECT), 1);
7156 }
7157
7158 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7159 We need to emit DTP-relative relocations. */
7160
7161 static void s390_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
7162
7163 static void
7164 s390_output_dwarf_dtprel (FILE *file, int size, rtx x)
7165 {
7166 switch (size)
7167 {
7168 case 4:
7169 fputs ("\t.long\t", file);
7170 break;
7171 case 8:
7172 fputs ("\t.quad\t", file);
7173 break;
7174 default:
7175 gcc_unreachable ();
7176 }
7177 output_addr_const (file, x);
7178 fputs ("@DTPOFF", file);
7179 }
7180
7181 /* Return the proper mode for REGNO being represented in the dwarf
7182 unwind table. */
7183 machine_mode
7184 s390_dwarf_frame_reg_mode (int regno)
7185 {
7186 machine_mode save_mode = default_dwarf_frame_reg_mode (regno);
7187
7188 /* Make sure not to return DImode for any GPR with -m31 -mzarch. */
7189 if (GENERAL_REGNO_P (regno))
7190 save_mode = Pmode;
7191
7192 /* The rightmost 64 bits of vector registers are call-clobbered. */
7193 if (GET_MODE_SIZE (save_mode) > 8)
7194 save_mode = DImode;
7195
7196 return save_mode;
7197 }
7198
7199 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
7200 /* Implement TARGET_MANGLE_TYPE. */
7201
7202 static const char *
7203 s390_mangle_type (const_tree type)
7204 {
7205 type = TYPE_MAIN_VARIANT (type);
7206
7207 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
7208 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
7209 return NULL;
7210
7211 if (type == s390_builtin_types[BT_BV16QI]) return "U6__boolc";
7212 if (type == s390_builtin_types[BT_BV8HI]) return "U6__bools";
7213 if (type == s390_builtin_types[BT_BV4SI]) return "U6__booli";
7214 if (type == s390_builtin_types[BT_BV2DI]) return "U6__booll";
7215
7216 if (TYPE_MAIN_VARIANT (type) == long_double_type_node
7217 && TARGET_LONG_DOUBLE_128)
7218 return "g";
7219
7220 /* For all other types, use normal C++ mangling. */
7221 return NULL;
7222 }
7223 #endif
7224
7225 /* In the name of slightly smaller debug output, and to cater to
7226 general assembler lossage, recognize various UNSPEC sequences
7227 and turn them back into a direct symbol reference. */
7228
7229 static rtx
7230 s390_delegitimize_address (rtx orig_x)
7231 {
7232 rtx x, y;
7233
7234 orig_x = delegitimize_mem_from_attrs (orig_x);
7235 x = orig_x;
7236
7237 /* Extract the symbol ref from:
7238 (plus:SI (reg:SI 12 %r12)
7239 (const:SI (unspec:SI [(symbol_ref/f:SI ("*.LC0"))]
7240 UNSPEC_GOTOFF/PLTOFF)))
7241 and
7242 (plus:SI (reg:SI 12 %r12)
7243 (const:SI (plus:SI (unspec:SI [(symbol_ref:SI ("L"))]
7244 UNSPEC_GOTOFF/PLTOFF)
7245 (const_int 4 [0x4])))) */
7246 if (GET_CODE (x) == PLUS
7247 && REG_P (XEXP (x, 0))
7248 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM
7249 && GET_CODE (XEXP (x, 1)) == CONST)
7250 {
7251 HOST_WIDE_INT offset = 0;
7252
7253 /* The const operand. */
7254 y = XEXP (XEXP (x, 1), 0);
7255
7256 if (GET_CODE (y) == PLUS
7257 && GET_CODE (XEXP (y, 1)) == CONST_INT)
7258 {
7259 offset = INTVAL (XEXP (y, 1));
7260 y = XEXP (y, 0);
7261 }
7262
7263 if (GET_CODE (y) == UNSPEC
7264 && (XINT (y, 1) == UNSPEC_GOTOFF
7265 || XINT (y, 1) == UNSPEC_PLTOFF))
7266 return plus_constant (Pmode, XVECEXP (y, 0, 0), offset);
7267 }
7268
7269 if (GET_CODE (x) != MEM)
7270 return orig_x;
7271
7272 x = XEXP (x, 0);
7273 if (GET_CODE (x) == PLUS
7274 && GET_CODE (XEXP (x, 1)) == CONST
7275 && GET_CODE (XEXP (x, 0)) == REG
7276 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7277 {
7278 y = XEXP (XEXP (x, 1), 0);
7279 if (GET_CODE (y) == UNSPEC
7280 && XINT (y, 1) == UNSPEC_GOT)
7281 y = XVECEXP (y, 0, 0);
7282 else
7283 return orig_x;
7284 }
7285 else if (GET_CODE (x) == CONST)
7286 {
7287 /* Extract the symbol ref from:
7288 (mem:QI (const:DI (unspec:DI [(symbol_ref:DI ("foo"))]
7289 UNSPEC_PLT/GOTENT))) */
7290
7291 y = XEXP (x, 0);
7292 if (GET_CODE (y) == UNSPEC
7293 && (XINT (y, 1) == UNSPEC_GOTENT
7294 || XINT (y, 1) == UNSPEC_PLT))
7295 y = XVECEXP (y, 0, 0);
7296 else
7297 return orig_x;
7298 }
7299 else
7300 return orig_x;
7301
7302 if (GET_MODE (orig_x) != Pmode)
7303 {
7304 if (GET_MODE (orig_x) == BLKmode)
7305 return orig_x;
7306 y = lowpart_subreg (GET_MODE (orig_x), y, Pmode);
7307 if (y == NULL_RTX)
7308 return orig_x;
7309 }
7310 return y;
7311 }
7312
7313 /* Output operand OP to stdio stream FILE.
7314 OP is an address (register + offset) which is not used to address data;
7315 instead the rightmost bits are interpreted as the value. */
7316
7317 static void
7318 print_addrstyle_operand (FILE *file, rtx op)
7319 {
7320 HOST_WIDE_INT offset;
7321 rtx base;
7322
7323 /* Extract base register and offset. */
7324 if (!s390_decompose_addrstyle_without_index (op, &base, &offset))
7325 gcc_unreachable ();
7326
7327 /* Sanity check. */
7328 if (base)
7329 {
7330 gcc_assert (GET_CODE (base) == REG);
7331 gcc_assert (REGNO (base) < FIRST_PSEUDO_REGISTER);
7332 gcc_assert (REGNO_REG_CLASS (REGNO (base)) == ADDR_REGS);
7333 }
7334
7335 /* Offsets are constricted to twelve bits. */
7336 fprintf (file, HOST_WIDE_INT_PRINT_DEC, offset & ((1 << 12) - 1));
7337 if (base)
7338 fprintf (file, "(%s)", reg_names[REGNO (base)]);
7339 }
7340
7341 /* Assigns the number of NOP halfwords to be emitted before and after the
7342 function label to *HW_BEFORE and *HW_AFTER. Both pointers must not be NULL.
7343 If hotpatching is disabled for the function, the values are set to zero.
7344 */
7345
7346 static void
7347 s390_function_num_hotpatch_hw (tree decl,
7348 int *hw_before,
7349 int *hw_after)
7350 {
7351 tree attr;
7352
7353 attr = lookup_attribute ("hotpatch", DECL_ATTRIBUTES (decl));
7354
7355 /* Handle the arguments of the hotpatch attribute. The values
7356 specified via attribute might override the cmdline argument
7357 values. */
7358 if (attr)
7359 {
7360 tree args = TREE_VALUE (attr);
7361
7362 *hw_before = TREE_INT_CST_LOW (TREE_VALUE (args));
7363 *hw_after = TREE_INT_CST_LOW (TREE_VALUE (TREE_CHAIN (args)));
7364 }
7365 else
7366 {
7367 /* Use the values specified by the cmdline arguments. */
7368 *hw_before = s390_hotpatch_hw_before_label;
7369 *hw_after = s390_hotpatch_hw_after_label;
7370 }
7371 }
7372
7373 /* Write the current .machine and .machinemode specification to the assembler
7374 file. */
7375
7376 #ifdef HAVE_AS_MACHINE_MACHINEMODE
7377 static void
7378 s390_asm_output_machine_for_arch (FILE *asm_out_file)
7379 {
7380 fprintf (asm_out_file, "\t.machinemode %s\n",
7381 (TARGET_ZARCH) ? "zarch" : "esa");
7382 fprintf (asm_out_file, "\t.machine \"%s",
7383 processor_table[s390_arch].binutils_name);
7384 if (S390_USE_ARCHITECTURE_MODIFIERS)
7385 {
7386 int cpu_flags;
7387
7388 cpu_flags = processor_flags_table[(int) s390_arch];
7389 if (TARGET_HTM && !(cpu_flags & PF_TX))
7390 fprintf (asm_out_file, "+htm");
7391 else if (!TARGET_HTM && (cpu_flags & PF_TX))
7392 fprintf (asm_out_file, "+nohtm");
7393 if (TARGET_VX && !(cpu_flags & PF_VX))
7394 fprintf (asm_out_file, "+vx");
7395 else if (!TARGET_VX && (cpu_flags & PF_VX))
7396 fprintf (asm_out_file, "+novx");
7397 }
7398 fprintf (asm_out_file, "\"\n");
7399 }
7400
7401 /* Write an extra function header before the very start of the function. */
7402
7403 void
7404 s390_asm_output_function_prefix (FILE *asm_out_file,
7405 const char *fnname ATTRIBUTE_UNUSED)
7406 {
7407 if (DECL_FUNCTION_SPECIFIC_TARGET (current_function_decl) == NULL)
7408 return;
7409 /* Since only the function specific options are saved but not the indications
7410 which options are set, it's too much work here to figure out which options
7411 have actually changed. Thus, generate .machine and .machinemode whenever a
7412 function has the target attribute or pragma. */
7413 fprintf (asm_out_file, "\t.machinemode push\n");
7414 fprintf (asm_out_file, "\t.machine push\n");
7415 s390_asm_output_machine_for_arch (asm_out_file);
7416 }
7417
7418 /* Write an extra function footer after the very end of the function. */
7419
7420 void
7421 s390_asm_declare_function_size (FILE *asm_out_file,
7422 const char *fnname, tree decl)
7423 {
7424 if (!flag_inhibit_size_directive)
7425 ASM_OUTPUT_MEASURED_SIZE (asm_out_file, fnname);
7426 if (DECL_FUNCTION_SPECIFIC_TARGET (decl) == NULL)
7427 return;
7428 fprintf (asm_out_file, "\t.machine pop\n");
7429 fprintf (asm_out_file, "\t.machinemode pop\n");
7430 }
7431 #endif
7432
7433 /* Write the extra assembler code needed to declare a function properly. */
7434
7435 void
7436 s390_asm_output_function_label (FILE *asm_out_file, const char *fname,
7437 tree decl)
7438 {
7439 int hw_before, hw_after;
7440
7441 s390_function_num_hotpatch_hw (decl, &hw_before, &hw_after);
7442 if (hw_before > 0)
7443 {
7444 unsigned int function_alignment;
7445 int i;
7446
7447 /* Add a trampoline code area before the function label and initialize it
7448 with two-byte nop instructions. This area can be overwritten with code
7449 that jumps to a patched version of the function. */
7450 asm_fprintf (asm_out_file, "\tnopr\t%%r0"
7451 "\t# pre-label NOPs for hotpatch (%d halfwords)\n",
7452 hw_before);
7453 for (i = 1; i < hw_before; i++)
7454 fputs ("\tnopr\t%r0\n", asm_out_file);
7455
7456 /* Note: The function label must be aligned so that (a) the bytes of the
7457 following nop do not cross a cacheline boundary, and (b) a jump address
7458 (eight bytes for 64 bit targets, 4 bytes for 32 bit targets) can be
7459 stored directly before the label without crossing a cacheline
7460 boundary. All this is necessary to make sure the trampoline code can
7461 be changed atomically.
7462 This alignment is done automatically using the FOUNCTION_BOUNDARY, but
7463 if there are NOPs before the function label, the alignment is placed
7464 before them. So it is necessary to duplicate the alignment after the
7465 NOPs. */
7466 function_alignment = MAX (8, DECL_ALIGN (decl) / BITS_PER_UNIT);
7467 if (! DECL_USER_ALIGN (decl))
7468 function_alignment = MAX (function_alignment,
7469 (unsigned int) align_functions);
7470 fputs ("\t# alignment for hotpatch\n", asm_out_file);
7471 ASM_OUTPUT_ALIGN (asm_out_file, floor_log2 (function_alignment));
7472 }
7473
7474 if (S390_USE_TARGET_ATTRIBUTE && TARGET_DEBUG_ARG)
7475 {
7476 asm_fprintf (asm_out_file, "\t# fn:%s ar%d\n", fname, s390_arch);
7477 asm_fprintf (asm_out_file, "\t# fn:%s tu%d\n", fname, s390_tune);
7478 asm_fprintf (asm_out_file, "\t# fn:%s sg%d\n", fname, s390_stack_guard);
7479 asm_fprintf (asm_out_file, "\t# fn:%s ss%d\n", fname, s390_stack_size);
7480 asm_fprintf (asm_out_file, "\t# fn:%s bc%d\n", fname, s390_branch_cost);
7481 asm_fprintf (asm_out_file, "\t# fn:%s wf%d\n", fname,
7482 s390_warn_framesize);
7483 asm_fprintf (asm_out_file, "\t# fn:%s ba%d\n", fname, TARGET_BACKCHAIN);
7484 asm_fprintf (asm_out_file, "\t# fn:%s hd%d\n", fname, TARGET_HARD_DFP);
7485 asm_fprintf (asm_out_file, "\t# fn:%s hf%d\n", fname, !TARGET_SOFT_FLOAT);
7486 asm_fprintf (asm_out_file, "\t# fn:%s ht%d\n", fname, TARGET_OPT_HTM);
7487 asm_fprintf (asm_out_file, "\t# fn:%s vx%d\n", fname, TARGET_OPT_VX);
7488 asm_fprintf (asm_out_file, "\t# fn:%s ps%d\n", fname,
7489 TARGET_PACKED_STACK);
7490 asm_fprintf (asm_out_file, "\t# fn:%s se%d\n", fname, TARGET_SMALL_EXEC);
7491 asm_fprintf (asm_out_file, "\t# fn:%s mv%d\n", fname, TARGET_MVCLE);
7492 asm_fprintf (asm_out_file, "\t# fn:%s zv%d\n", fname, TARGET_ZVECTOR);
7493 asm_fprintf (asm_out_file, "\t# fn:%s wd%d\n", fname,
7494 s390_warn_dynamicstack_p);
7495 }
7496 ASM_OUTPUT_LABEL (asm_out_file, fname);
7497 if (hw_after > 0)
7498 asm_fprintf (asm_out_file,
7499 "\t# post-label NOPs for hotpatch (%d halfwords)\n",
7500 hw_after);
7501 }
7502
7503 /* Output machine-dependent UNSPECs occurring in address constant X
7504 in assembler syntax to stdio stream FILE. Returns true if the
7505 constant X could be recognized, false otherwise. */
7506
7507 static bool
7508 s390_output_addr_const_extra (FILE *file, rtx x)
7509 {
7510 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 1)
7511 switch (XINT (x, 1))
7512 {
7513 case UNSPEC_GOTENT:
7514 output_addr_const (file, XVECEXP (x, 0, 0));
7515 fprintf (file, "@GOTENT");
7516 return true;
7517 case UNSPEC_GOT:
7518 output_addr_const (file, XVECEXP (x, 0, 0));
7519 fprintf (file, "@GOT");
7520 return true;
7521 case UNSPEC_GOTOFF:
7522 output_addr_const (file, XVECEXP (x, 0, 0));
7523 fprintf (file, "@GOTOFF");
7524 return true;
7525 case UNSPEC_PLT:
7526 output_addr_const (file, XVECEXP (x, 0, 0));
7527 fprintf (file, "@PLT");
7528 return true;
7529 case UNSPEC_PLTOFF:
7530 output_addr_const (file, XVECEXP (x, 0, 0));
7531 fprintf (file, "@PLTOFF");
7532 return true;
7533 case UNSPEC_TLSGD:
7534 output_addr_const (file, XVECEXP (x, 0, 0));
7535 fprintf (file, "@TLSGD");
7536 return true;
7537 case UNSPEC_TLSLDM:
7538 assemble_name (file, get_some_local_dynamic_name ());
7539 fprintf (file, "@TLSLDM");
7540 return true;
7541 case UNSPEC_DTPOFF:
7542 output_addr_const (file, XVECEXP (x, 0, 0));
7543 fprintf (file, "@DTPOFF");
7544 return true;
7545 case UNSPEC_NTPOFF:
7546 output_addr_const (file, XVECEXP (x, 0, 0));
7547 fprintf (file, "@NTPOFF");
7548 return true;
7549 case UNSPEC_GOTNTPOFF:
7550 output_addr_const (file, XVECEXP (x, 0, 0));
7551 fprintf (file, "@GOTNTPOFF");
7552 return true;
7553 case UNSPEC_INDNTPOFF:
7554 output_addr_const (file, XVECEXP (x, 0, 0));
7555 fprintf (file, "@INDNTPOFF");
7556 return true;
7557 }
7558
7559 if (GET_CODE (x) == UNSPEC && XVECLEN (x, 0) == 2)
7560 switch (XINT (x, 1))
7561 {
7562 case UNSPEC_POOL_OFFSET:
7563 x = gen_rtx_MINUS (GET_MODE (x), XVECEXP (x, 0, 0), XVECEXP (x, 0, 1));
7564 output_addr_const (file, x);
7565 return true;
7566 }
7567 return false;
7568 }
7569
7570 /* Output address operand ADDR in assembler syntax to
7571 stdio stream FILE. */
7572
7573 void
7574 print_operand_address (FILE *file, rtx addr)
7575 {
7576 struct s390_address ad;
7577 memset (&ad, 0, sizeof (s390_address));
7578
7579 if (s390_loadrelative_operand_p (addr, NULL, NULL))
7580 {
7581 if (!TARGET_Z10)
7582 {
7583 output_operand_lossage ("symbolic memory references are "
7584 "only supported on z10 or later");
7585 return;
7586 }
7587 output_addr_const (file, addr);
7588 return;
7589 }
7590
7591 if (!s390_decompose_address (addr, &ad)
7592 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7593 || (ad.indx && !REGNO_OK_FOR_INDEX_P (REGNO (ad.indx))))
7594 output_operand_lossage ("cannot decompose address");
7595
7596 if (ad.disp)
7597 output_addr_const (file, ad.disp);
7598 else
7599 fprintf (file, "0");
7600
7601 if (ad.base && ad.indx)
7602 fprintf (file, "(%s,%s)", reg_names[REGNO (ad.indx)],
7603 reg_names[REGNO (ad.base)]);
7604 else if (ad.base)
7605 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7606 }
7607
7608 /* Output operand X in assembler syntax to stdio stream FILE.
7609 CODE specified the format flag. The following format flags
7610 are recognized:
7611
7612 'C': print opcode suffix for branch condition.
7613 'D': print opcode suffix for inverse branch condition.
7614 'E': print opcode suffix for branch on index instruction.
7615 'G': print the size of the operand in bytes.
7616 'J': print tls_load/tls_gdcall/tls_ldcall suffix
7617 'M': print the second word of a TImode operand.
7618 'N': print the second word of a DImode operand.
7619 'O': print only the displacement of a memory reference or address.
7620 'R': print only the base register of a memory reference or address.
7621 'S': print S-type memory reference (base+displacement).
7622 'Y': print address style operand without index (e.g. shift count or setmem
7623 operand).
7624
7625 'b': print integer X as if it's an unsigned byte.
7626 'c': print integer X as if it's an signed byte.
7627 'e': "end" contiguous bitmask X in either DImode or vector inner mode.
7628 'f': "end" contiguous bitmask X in SImode.
7629 'h': print integer X as if it's a signed halfword.
7630 'i': print the first nonzero HImode part of X.
7631 'j': print the first HImode part unequal to -1 of X.
7632 'k': print the first nonzero SImode part of X.
7633 'm': print the first SImode part unequal to -1 of X.
7634 'o': print integer X as if it's an unsigned 32bit word.
7635 's': "start" of contiguous bitmask X in either DImode or vector inner mode.
7636 't': CONST_INT: "start" of contiguous bitmask X in SImode.
7637 CONST_VECTOR: Generate a bitmask for vgbm instruction.
7638 'x': print integer X as if it's an unsigned halfword.
7639 'v': print register number as vector register (v1 instead of f1).
7640 */
7641
7642 void
7643 print_operand (FILE *file, rtx x, int code)
7644 {
7645 HOST_WIDE_INT ival;
7646
7647 switch (code)
7648 {
7649 case 'C':
7650 fprintf (file, s390_branch_condition_mnemonic (x, FALSE));
7651 return;
7652
7653 case 'D':
7654 fprintf (file, s390_branch_condition_mnemonic (x, TRUE));
7655 return;
7656
7657 case 'E':
7658 if (GET_CODE (x) == LE)
7659 fprintf (file, "l");
7660 else if (GET_CODE (x) == GT)
7661 fprintf (file, "h");
7662 else
7663 output_operand_lossage ("invalid comparison operator "
7664 "for 'E' output modifier");
7665 return;
7666
7667 case 'J':
7668 if (GET_CODE (x) == SYMBOL_REF)
7669 {
7670 fprintf (file, "%s", ":tls_load:");
7671 output_addr_const (file, x);
7672 }
7673 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSGD)
7674 {
7675 fprintf (file, "%s", ":tls_gdcall:");
7676 output_addr_const (file, XVECEXP (x, 0, 0));
7677 }
7678 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLSLDM)
7679 {
7680 fprintf (file, "%s", ":tls_ldcall:");
7681 const char *name = get_some_local_dynamic_name ();
7682 gcc_assert (name);
7683 assemble_name (file, name);
7684 }
7685 else
7686 output_operand_lossage ("invalid reference for 'J' output modifier");
7687 return;
7688
7689 case 'G':
7690 fprintf (file, "%u", GET_MODE_SIZE (GET_MODE (x)));
7691 return;
7692
7693 case 'O':
7694 {
7695 struct s390_address ad;
7696 int ret;
7697
7698 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7699
7700 if (!ret
7701 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7702 || ad.indx)
7703 {
7704 output_operand_lossage ("invalid address for 'O' output modifier");
7705 return;
7706 }
7707
7708 if (ad.disp)
7709 output_addr_const (file, ad.disp);
7710 else
7711 fprintf (file, "0");
7712 }
7713 return;
7714
7715 case 'R':
7716 {
7717 struct s390_address ad;
7718 int ret;
7719
7720 ret = s390_decompose_address (MEM_P (x) ? XEXP (x, 0) : x, &ad);
7721
7722 if (!ret
7723 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7724 || ad.indx)
7725 {
7726 output_operand_lossage ("invalid address for 'R' output modifier");
7727 return;
7728 }
7729
7730 if (ad.base)
7731 fprintf (file, "%s", reg_names[REGNO (ad.base)]);
7732 else
7733 fprintf (file, "0");
7734 }
7735 return;
7736
7737 case 'S':
7738 {
7739 struct s390_address ad;
7740 int ret;
7741
7742 if (!MEM_P (x))
7743 {
7744 output_operand_lossage ("memory reference expected for "
7745 "'S' output modifier");
7746 return;
7747 }
7748 ret = s390_decompose_address (XEXP (x, 0), &ad);
7749
7750 if (!ret
7751 || (ad.base && !REGNO_OK_FOR_BASE_P (REGNO (ad.base)))
7752 || ad.indx)
7753 {
7754 output_operand_lossage ("invalid address for 'S' output modifier");
7755 return;
7756 }
7757
7758 if (ad.disp)
7759 output_addr_const (file, ad.disp);
7760 else
7761 fprintf (file, "0");
7762
7763 if (ad.base)
7764 fprintf (file, "(%s)", reg_names[REGNO (ad.base)]);
7765 }
7766 return;
7767
7768 case 'N':
7769 if (GET_CODE (x) == REG)
7770 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7771 else if (GET_CODE (x) == MEM)
7772 x = change_address (x, VOIDmode,
7773 plus_constant (Pmode, XEXP (x, 0), 4));
7774 else
7775 output_operand_lossage ("register or memory expression expected "
7776 "for 'N' output modifier");
7777 break;
7778
7779 case 'M':
7780 if (GET_CODE (x) == REG)
7781 x = gen_rtx_REG (GET_MODE (x), REGNO (x) + 1);
7782 else if (GET_CODE (x) == MEM)
7783 x = change_address (x, VOIDmode,
7784 plus_constant (Pmode, XEXP (x, 0), 8));
7785 else
7786 output_operand_lossage ("register or memory expression expected "
7787 "for 'M' output modifier");
7788 break;
7789
7790 case 'Y':
7791 print_addrstyle_operand (file, x);
7792 return;
7793 }
7794
7795 switch (GET_CODE (x))
7796 {
7797 case REG:
7798 /* Print FP regs as fx instead of vx when they are accessed
7799 through non-vector mode. */
7800 if (code == 'v'
7801 || VECTOR_NOFP_REG_P (x)
7802 || (FP_REG_P (x) && VECTOR_MODE_P (GET_MODE (x)))
7803 || (VECTOR_REG_P (x)
7804 && (GET_MODE_SIZE (GET_MODE (x)) /
7805 s390_class_max_nregs (FP_REGS, GET_MODE (x))) > 8))
7806 fprintf (file, "%%v%s", reg_names[REGNO (x)] + 2);
7807 else
7808 fprintf (file, "%s", reg_names[REGNO (x)]);
7809 break;
7810
7811 case MEM:
7812 output_address (GET_MODE (x), XEXP (x, 0));
7813 break;
7814
7815 case CONST:
7816 case CODE_LABEL:
7817 case LABEL_REF:
7818 case SYMBOL_REF:
7819 output_addr_const (file, x);
7820 break;
7821
7822 case CONST_INT:
7823 ival = INTVAL (x);
7824 switch (code)
7825 {
7826 case 0:
7827 break;
7828 case 'b':
7829 ival &= 0xff;
7830 break;
7831 case 'c':
7832 ival = ((ival & 0xff) ^ 0x80) - 0x80;
7833 break;
7834 case 'x':
7835 ival &= 0xffff;
7836 break;
7837 case 'h':
7838 ival = ((ival & 0xffff) ^ 0x8000) - 0x8000;
7839 break;
7840 case 'i':
7841 ival = s390_extract_part (x, HImode, 0);
7842 break;
7843 case 'j':
7844 ival = s390_extract_part (x, HImode, -1);
7845 break;
7846 case 'k':
7847 ival = s390_extract_part (x, SImode, 0);
7848 break;
7849 case 'm':
7850 ival = s390_extract_part (x, SImode, -1);
7851 break;
7852 case 'o':
7853 ival &= 0xffffffff;
7854 break;
7855 case 'e': case 'f':
7856 case 's': case 't':
7857 {
7858 int start, end;
7859 int len;
7860 bool ok;
7861
7862 len = (code == 's' || code == 'e' ? 64 : 32);
7863 ok = s390_contiguous_bitmask_p (ival, true, len, &start, &end);
7864 gcc_assert (ok);
7865 if (code == 's' || code == 't')
7866 ival = start;
7867 else
7868 ival = end;
7869 }
7870 break;
7871 default:
7872 output_operand_lossage ("invalid constant for output modifier '%c'", code);
7873 }
7874 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7875 break;
7876
7877 case CONST_WIDE_INT:
7878 if (code == 'b')
7879 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7880 CONST_WIDE_INT_ELT (x, 0) & 0xff);
7881 else if (code == 'x')
7882 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7883 CONST_WIDE_INT_ELT (x, 0) & 0xffff);
7884 else if (code == 'h')
7885 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7886 ((CONST_WIDE_INT_ELT (x, 0) & 0xffff) ^ 0x8000) - 0x8000);
7887 else
7888 {
7889 if (code == 0)
7890 output_operand_lossage ("invalid constant - try using "
7891 "an output modifier");
7892 else
7893 output_operand_lossage ("invalid constant for output modifier '%c'",
7894 code);
7895 }
7896 break;
7897 case CONST_VECTOR:
7898 switch (code)
7899 {
7900 case 'h':
7901 gcc_assert (const_vec_duplicate_p (x));
7902 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
7903 ((INTVAL (XVECEXP (x, 0, 0)) & 0xffff) ^ 0x8000) - 0x8000);
7904 break;
7905 case 'e':
7906 case 's':
7907 {
7908 int start, end;
7909 bool ok;
7910
7911 ok = s390_contiguous_bitmask_vector_p (x, &start, &end);
7912 gcc_assert (ok);
7913 ival = (code == 's') ? start : end;
7914 fprintf (file, HOST_WIDE_INT_PRINT_DEC, ival);
7915 }
7916 break;
7917 case 't':
7918 {
7919 unsigned mask;
7920 bool ok = s390_bytemask_vector_p (x, &mask);
7921 gcc_assert (ok);
7922 fprintf (file, "%u", mask);
7923 }
7924 break;
7925
7926 default:
7927 output_operand_lossage ("invalid constant vector for output "
7928 "modifier '%c'", code);
7929 }
7930 break;
7931
7932 default:
7933 if (code == 0)
7934 output_operand_lossage ("invalid expression - try using "
7935 "an output modifier");
7936 else
7937 output_operand_lossage ("invalid expression for output "
7938 "modifier '%c'", code);
7939 break;
7940 }
7941 }
7942
7943 /* Target hook for assembling integer objects. We need to define it
7944 here to work a round a bug in some versions of GAS, which couldn't
7945 handle values smaller than INT_MIN when printed in decimal. */
7946
7947 static bool
7948 s390_assemble_integer (rtx x, unsigned int size, int aligned_p)
7949 {
7950 if (size == 8 && aligned_p
7951 && GET_CODE (x) == CONST_INT && INTVAL (x) < INT_MIN)
7952 {
7953 fprintf (asm_out_file, "\t.quad\t" HOST_WIDE_INT_PRINT_HEX "\n",
7954 INTVAL (x));
7955 return true;
7956 }
7957 return default_assemble_integer (x, size, aligned_p);
7958 }
7959
7960 /* Returns true if register REGNO is used for forming
7961 a memory address in expression X. */
7962
7963 static bool
7964 reg_used_in_mem_p (int regno, rtx x)
7965 {
7966 enum rtx_code code = GET_CODE (x);
7967 int i, j;
7968 const char *fmt;
7969
7970 if (code == MEM)
7971 {
7972 if (refers_to_regno_p (regno, XEXP (x, 0)))
7973 return true;
7974 }
7975 else if (code == SET
7976 && GET_CODE (SET_DEST (x)) == PC)
7977 {
7978 if (refers_to_regno_p (regno, SET_SRC (x)))
7979 return true;
7980 }
7981
7982 fmt = GET_RTX_FORMAT (code);
7983 for (i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
7984 {
7985 if (fmt[i] == 'e'
7986 && reg_used_in_mem_p (regno, XEXP (x, i)))
7987 return true;
7988
7989 else if (fmt[i] == 'E')
7990 for (j = 0; j < XVECLEN (x, i); j++)
7991 if (reg_used_in_mem_p (regno, XVECEXP (x, i, j)))
7992 return true;
7993 }
7994 return false;
7995 }
7996
7997 /* Returns true if expression DEP_RTX sets an address register
7998 used by instruction INSN to address memory. */
7999
8000 static bool
8001 addr_generation_dependency_p (rtx dep_rtx, rtx_insn *insn)
8002 {
8003 rtx target, pat;
8004
8005 if (NONJUMP_INSN_P (dep_rtx))
8006 dep_rtx = PATTERN (dep_rtx);
8007
8008 if (GET_CODE (dep_rtx) == SET)
8009 {
8010 target = SET_DEST (dep_rtx);
8011 if (GET_CODE (target) == STRICT_LOW_PART)
8012 target = XEXP (target, 0);
8013 while (GET_CODE (target) == SUBREG)
8014 target = SUBREG_REG (target);
8015
8016 if (GET_CODE (target) == REG)
8017 {
8018 int regno = REGNO (target);
8019
8020 if (s390_safe_attr_type (insn) == TYPE_LA)
8021 {
8022 pat = PATTERN (insn);
8023 if (GET_CODE (pat) == PARALLEL)
8024 {
8025 gcc_assert (XVECLEN (pat, 0) == 2);
8026 pat = XVECEXP (pat, 0, 0);
8027 }
8028 gcc_assert (GET_CODE (pat) == SET);
8029 return refers_to_regno_p (regno, SET_SRC (pat));
8030 }
8031 else if (get_attr_atype (insn) == ATYPE_AGEN)
8032 return reg_used_in_mem_p (regno, PATTERN (insn));
8033 }
8034 }
8035 return false;
8036 }
8037
8038 /* Return 1, if dep_insn sets register used in insn in the agen unit. */
8039
8040 int
8041 s390_agen_dep_p (rtx_insn *dep_insn, rtx_insn *insn)
8042 {
8043 rtx dep_rtx = PATTERN (dep_insn);
8044 int i;
8045
8046 if (GET_CODE (dep_rtx) == SET
8047 && addr_generation_dependency_p (dep_rtx, insn))
8048 return 1;
8049 else if (GET_CODE (dep_rtx) == PARALLEL)
8050 {
8051 for (i = 0; i < XVECLEN (dep_rtx, 0); i++)
8052 {
8053 if (addr_generation_dependency_p (XVECEXP (dep_rtx, 0, i), insn))
8054 return 1;
8055 }
8056 }
8057 return 0;
8058 }
8059
8060
8061 /* A C statement (sans semicolon) to update the integer scheduling priority
8062 INSN_PRIORITY (INSN). Increase the priority to execute the INSN earlier,
8063 reduce the priority to execute INSN later. Do not define this macro if
8064 you do not need to adjust the scheduling priorities of insns.
8065
8066 A STD instruction should be scheduled earlier,
8067 in order to use the bypass. */
8068 static int
8069 s390_adjust_priority (rtx_insn *insn, int priority)
8070 {
8071 if (! INSN_P (insn))
8072 return priority;
8073
8074 if (s390_tune <= PROCESSOR_2064_Z900)
8075 return priority;
8076
8077 switch (s390_safe_attr_type (insn))
8078 {
8079 case TYPE_FSTOREDF:
8080 case TYPE_FSTORESF:
8081 priority = priority << 3;
8082 break;
8083 case TYPE_STORE:
8084 case TYPE_STM:
8085 priority = priority << 1;
8086 break;
8087 default:
8088 break;
8089 }
8090 return priority;
8091 }
8092
8093
8094 /* The number of instructions that can be issued per cycle. */
8095
8096 static int
8097 s390_issue_rate (void)
8098 {
8099 switch (s390_tune)
8100 {
8101 case PROCESSOR_2084_Z990:
8102 case PROCESSOR_2094_Z9_109:
8103 case PROCESSOR_2094_Z9_EC:
8104 case PROCESSOR_2817_Z196:
8105 return 3;
8106 case PROCESSOR_2097_Z10:
8107 return 2;
8108 case PROCESSOR_9672_G5:
8109 case PROCESSOR_9672_G6:
8110 case PROCESSOR_2064_Z900:
8111 /* Starting with EC12 we use the sched_reorder hook to take care
8112 of instruction dispatch constraints. The algorithm only
8113 picks the best instruction and assumes only a single
8114 instruction gets issued per cycle. */
8115 case PROCESSOR_2827_ZEC12:
8116 case PROCESSOR_2964_Z13:
8117 case PROCESSOR_3906_Z14:
8118 default:
8119 return 1;
8120 }
8121 }
8122
8123 static int
8124 s390_first_cycle_multipass_dfa_lookahead (void)
8125 {
8126 return 4;
8127 }
8128
8129 /* Annotate every literal pool reference in X by an UNSPEC_LTREF expression.
8130 Fix up MEMs as required. */
8131
8132 static void
8133 annotate_constant_pool_refs (rtx *x)
8134 {
8135 int i, j;
8136 const char *fmt;
8137
8138 gcc_assert (GET_CODE (*x) != SYMBOL_REF
8139 || !CONSTANT_POOL_ADDRESS_P (*x));
8140
8141 /* Literal pool references can only occur inside a MEM ... */
8142 if (GET_CODE (*x) == MEM)
8143 {
8144 rtx memref = XEXP (*x, 0);
8145
8146 if (GET_CODE (memref) == SYMBOL_REF
8147 && CONSTANT_POOL_ADDRESS_P (memref))
8148 {
8149 rtx base = cfun->machine->base_reg;
8150 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, memref, base),
8151 UNSPEC_LTREF);
8152
8153 *x = replace_equiv_address (*x, addr);
8154 return;
8155 }
8156
8157 if (GET_CODE (memref) == CONST
8158 && GET_CODE (XEXP (memref, 0)) == PLUS
8159 && GET_CODE (XEXP (XEXP (memref, 0), 1)) == CONST_INT
8160 && GET_CODE (XEXP (XEXP (memref, 0), 0)) == SYMBOL_REF
8161 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (memref, 0), 0)))
8162 {
8163 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (memref, 0), 1));
8164 rtx sym = XEXP (XEXP (memref, 0), 0);
8165 rtx base = cfun->machine->base_reg;
8166 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8167 UNSPEC_LTREF);
8168
8169 *x = replace_equiv_address (*x, plus_constant (Pmode, addr, off));
8170 return;
8171 }
8172 }
8173
8174 /* ... or a load-address type pattern. */
8175 if (GET_CODE (*x) == SET)
8176 {
8177 rtx addrref = SET_SRC (*x);
8178
8179 if (GET_CODE (addrref) == SYMBOL_REF
8180 && CONSTANT_POOL_ADDRESS_P (addrref))
8181 {
8182 rtx base = cfun->machine->base_reg;
8183 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, addrref, base),
8184 UNSPEC_LTREF);
8185
8186 SET_SRC (*x) = addr;
8187 return;
8188 }
8189
8190 if (GET_CODE (addrref) == CONST
8191 && GET_CODE (XEXP (addrref, 0)) == PLUS
8192 && GET_CODE (XEXP (XEXP (addrref, 0), 1)) == CONST_INT
8193 && GET_CODE (XEXP (XEXP (addrref, 0), 0)) == SYMBOL_REF
8194 && CONSTANT_POOL_ADDRESS_P (XEXP (XEXP (addrref, 0), 0)))
8195 {
8196 HOST_WIDE_INT off = INTVAL (XEXP (XEXP (addrref, 0), 1));
8197 rtx sym = XEXP (XEXP (addrref, 0), 0);
8198 rtx base = cfun->machine->base_reg;
8199 rtx addr = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, sym, base),
8200 UNSPEC_LTREF);
8201
8202 SET_SRC (*x) = plus_constant (Pmode, addr, off);
8203 return;
8204 }
8205 }
8206
8207 /* Annotate LTREL_BASE as well. */
8208 if (GET_CODE (*x) == UNSPEC
8209 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8210 {
8211 rtx base = cfun->machine->base_reg;
8212 *x = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XVECEXP (*x, 0, 0), base),
8213 UNSPEC_LTREL_BASE);
8214 return;
8215 }
8216
8217 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8218 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8219 {
8220 if (fmt[i] == 'e')
8221 {
8222 annotate_constant_pool_refs (&XEXP (*x, i));
8223 }
8224 else if (fmt[i] == 'E')
8225 {
8226 for (j = 0; j < XVECLEN (*x, i); j++)
8227 annotate_constant_pool_refs (&XVECEXP (*x, i, j));
8228 }
8229 }
8230 }
8231
8232 /* Split all branches that exceed the maximum distance.
8233 Returns true if this created a new literal pool entry. */
8234
8235 static int
8236 s390_split_branches (void)
8237 {
8238 rtx temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
8239 int new_literal = 0, ret;
8240 rtx_insn *insn;
8241 rtx pat, target;
8242 rtx *label;
8243
8244 /* We need correct insn addresses. */
8245
8246 shorten_branches (get_insns ());
8247
8248 /* Find all branches that exceed 64KB, and split them. */
8249
8250 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8251 {
8252 if (! JUMP_P (insn) || tablejump_p (insn, NULL, NULL))
8253 continue;
8254
8255 pat = PATTERN (insn);
8256 if (GET_CODE (pat) == PARALLEL)
8257 pat = XVECEXP (pat, 0, 0);
8258 if (GET_CODE (pat) != SET || SET_DEST (pat) != pc_rtx)
8259 continue;
8260
8261 if (GET_CODE (SET_SRC (pat)) == LABEL_REF)
8262 {
8263 label = &SET_SRC (pat);
8264 }
8265 else if (GET_CODE (SET_SRC (pat)) == IF_THEN_ELSE)
8266 {
8267 if (GET_CODE (XEXP (SET_SRC (pat), 1)) == LABEL_REF)
8268 label = &XEXP (SET_SRC (pat), 1);
8269 else if (GET_CODE (XEXP (SET_SRC (pat), 2)) == LABEL_REF)
8270 label = &XEXP (SET_SRC (pat), 2);
8271 else
8272 continue;
8273 }
8274 else
8275 continue;
8276
8277 if (get_attr_length (insn) <= 4)
8278 continue;
8279
8280 /* We are going to use the return register as scratch register,
8281 make sure it will be saved/restored by the prologue/epilogue. */
8282 cfun_frame_layout.save_return_addr_p = 1;
8283
8284 if (!flag_pic)
8285 {
8286 new_literal = 1;
8287 rtx mem = force_const_mem (Pmode, *label);
8288 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, mem),
8289 insn);
8290 INSN_ADDRESSES_NEW (set_insn, -1);
8291 annotate_constant_pool_refs (&PATTERN (set_insn));
8292
8293 target = temp_reg;
8294 }
8295 else
8296 {
8297 new_literal = 1;
8298 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, *label),
8299 UNSPEC_LTREL_OFFSET);
8300 target = gen_rtx_CONST (Pmode, target);
8301 target = force_const_mem (Pmode, target);
8302 rtx_insn *set_insn = emit_insn_before (gen_rtx_SET (temp_reg, target),
8303 insn);
8304 INSN_ADDRESSES_NEW (set_insn, -1);
8305 annotate_constant_pool_refs (&PATTERN (set_insn));
8306
8307 target = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, XEXP (target, 0),
8308 cfun->machine->base_reg),
8309 UNSPEC_LTREL_BASE);
8310 target = gen_rtx_PLUS (Pmode, temp_reg, target);
8311 }
8312
8313 ret = validate_change (insn, label, target, 0);
8314 gcc_assert (ret);
8315 }
8316
8317 return new_literal;
8318 }
8319
8320
8321 /* Find an annotated literal pool symbol referenced in RTX X,
8322 and store it at REF. Will abort if X contains references to
8323 more than one such pool symbol; multiple references to the same
8324 symbol are allowed, however.
8325
8326 The rtx pointed to by REF must be initialized to NULL_RTX
8327 by the caller before calling this routine. */
8328
8329 static void
8330 find_constant_pool_ref (rtx x, rtx *ref)
8331 {
8332 int i, j;
8333 const char *fmt;
8334
8335 /* Ignore LTREL_BASE references. */
8336 if (GET_CODE (x) == UNSPEC
8337 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8338 return;
8339 /* Likewise POOL_ENTRY insns. */
8340 if (GET_CODE (x) == UNSPEC_VOLATILE
8341 && XINT (x, 1) == UNSPECV_POOL_ENTRY)
8342 return;
8343
8344 gcc_assert (GET_CODE (x) != SYMBOL_REF
8345 || !CONSTANT_POOL_ADDRESS_P (x));
8346
8347 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_LTREF)
8348 {
8349 rtx sym = XVECEXP (x, 0, 0);
8350 gcc_assert (GET_CODE (sym) == SYMBOL_REF
8351 && CONSTANT_POOL_ADDRESS_P (sym));
8352
8353 if (*ref == NULL_RTX)
8354 *ref = sym;
8355 else
8356 gcc_assert (*ref == sym);
8357
8358 return;
8359 }
8360
8361 fmt = GET_RTX_FORMAT (GET_CODE (x));
8362 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8363 {
8364 if (fmt[i] == 'e')
8365 {
8366 find_constant_pool_ref (XEXP (x, i), ref);
8367 }
8368 else if (fmt[i] == 'E')
8369 {
8370 for (j = 0; j < XVECLEN (x, i); j++)
8371 find_constant_pool_ref (XVECEXP (x, i, j), ref);
8372 }
8373 }
8374 }
8375
8376 /* Replace every reference to the annotated literal pool
8377 symbol REF in X by its base plus OFFSET. */
8378
8379 static void
8380 replace_constant_pool_ref (rtx *x, rtx ref, rtx offset)
8381 {
8382 int i, j;
8383 const char *fmt;
8384
8385 gcc_assert (*x != ref);
8386
8387 if (GET_CODE (*x) == UNSPEC
8388 && XINT (*x, 1) == UNSPEC_LTREF
8389 && XVECEXP (*x, 0, 0) == ref)
8390 {
8391 *x = gen_rtx_PLUS (Pmode, XVECEXP (*x, 0, 1), offset);
8392 return;
8393 }
8394
8395 if (GET_CODE (*x) == PLUS
8396 && GET_CODE (XEXP (*x, 1)) == CONST_INT
8397 && GET_CODE (XEXP (*x, 0)) == UNSPEC
8398 && XINT (XEXP (*x, 0), 1) == UNSPEC_LTREF
8399 && XVECEXP (XEXP (*x, 0), 0, 0) == ref)
8400 {
8401 rtx addr = gen_rtx_PLUS (Pmode, XVECEXP (XEXP (*x, 0), 0, 1), offset);
8402 *x = plus_constant (Pmode, addr, INTVAL (XEXP (*x, 1)));
8403 return;
8404 }
8405
8406 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8407 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8408 {
8409 if (fmt[i] == 'e')
8410 {
8411 replace_constant_pool_ref (&XEXP (*x, i), ref, offset);
8412 }
8413 else if (fmt[i] == 'E')
8414 {
8415 for (j = 0; j < XVECLEN (*x, i); j++)
8416 replace_constant_pool_ref (&XVECEXP (*x, i, j), ref, offset);
8417 }
8418 }
8419 }
8420
8421 /* Check whether X contains an UNSPEC_LTREL_BASE.
8422 Return its constant pool symbol if found, NULL_RTX otherwise. */
8423
8424 static rtx
8425 find_ltrel_base (rtx x)
8426 {
8427 int i, j;
8428 const char *fmt;
8429
8430 if (GET_CODE (x) == UNSPEC
8431 && XINT (x, 1) == UNSPEC_LTREL_BASE)
8432 return XVECEXP (x, 0, 0);
8433
8434 fmt = GET_RTX_FORMAT (GET_CODE (x));
8435 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
8436 {
8437 if (fmt[i] == 'e')
8438 {
8439 rtx fnd = find_ltrel_base (XEXP (x, i));
8440 if (fnd)
8441 return fnd;
8442 }
8443 else if (fmt[i] == 'E')
8444 {
8445 for (j = 0; j < XVECLEN (x, i); j++)
8446 {
8447 rtx fnd = find_ltrel_base (XVECEXP (x, i, j));
8448 if (fnd)
8449 return fnd;
8450 }
8451 }
8452 }
8453
8454 return NULL_RTX;
8455 }
8456
8457 /* Replace any occurrence of UNSPEC_LTREL_BASE in X with its base. */
8458
8459 static void
8460 replace_ltrel_base (rtx *x)
8461 {
8462 int i, j;
8463 const char *fmt;
8464
8465 if (GET_CODE (*x) == UNSPEC
8466 && XINT (*x, 1) == UNSPEC_LTREL_BASE)
8467 {
8468 *x = XVECEXP (*x, 0, 1);
8469 return;
8470 }
8471
8472 fmt = GET_RTX_FORMAT (GET_CODE (*x));
8473 for (i = GET_RTX_LENGTH (GET_CODE (*x)) - 1; i >= 0; i--)
8474 {
8475 if (fmt[i] == 'e')
8476 {
8477 replace_ltrel_base (&XEXP (*x, i));
8478 }
8479 else if (fmt[i] == 'E')
8480 {
8481 for (j = 0; j < XVECLEN (*x, i); j++)
8482 replace_ltrel_base (&XVECEXP (*x, i, j));
8483 }
8484 }
8485 }
8486
8487
8488 /* We keep a list of constants which we have to add to internal
8489 constant tables in the middle of large functions. */
8490
8491 #define NR_C_MODES 32
8492 machine_mode constant_modes[NR_C_MODES] =
8493 {
8494 TFmode, TImode, TDmode,
8495 V16QImode, V8HImode, V4SImode, V2DImode, V1TImode,
8496 V4SFmode, V2DFmode, V1TFmode,
8497 DFmode, DImode, DDmode,
8498 V8QImode, V4HImode, V2SImode, V1DImode, V2SFmode, V1DFmode,
8499 SFmode, SImode, SDmode,
8500 V4QImode, V2HImode, V1SImode, V1SFmode,
8501 HImode,
8502 V2QImode, V1HImode,
8503 QImode,
8504 V1QImode
8505 };
8506
8507 struct constant
8508 {
8509 struct constant *next;
8510 rtx value;
8511 rtx_code_label *label;
8512 };
8513
8514 struct constant_pool
8515 {
8516 struct constant_pool *next;
8517 rtx_insn *first_insn;
8518 rtx_insn *pool_insn;
8519 bitmap insns;
8520 rtx_insn *emit_pool_after;
8521
8522 struct constant *constants[NR_C_MODES];
8523 struct constant *execute;
8524 rtx_code_label *label;
8525 int size;
8526 };
8527
8528 /* Allocate new constant_pool structure. */
8529
8530 static struct constant_pool *
8531 s390_alloc_pool (void)
8532 {
8533 struct constant_pool *pool;
8534 int i;
8535
8536 pool = (struct constant_pool *) xmalloc (sizeof *pool);
8537 pool->next = NULL;
8538 for (i = 0; i < NR_C_MODES; i++)
8539 pool->constants[i] = NULL;
8540
8541 pool->execute = NULL;
8542 pool->label = gen_label_rtx ();
8543 pool->first_insn = NULL;
8544 pool->pool_insn = NULL;
8545 pool->insns = BITMAP_ALLOC (NULL);
8546 pool->size = 0;
8547 pool->emit_pool_after = NULL;
8548
8549 return pool;
8550 }
8551
8552 /* Create new constant pool covering instructions starting at INSN
8553 and chain it to the end of POOL_LIST. */
8554
8555 static struct constant_pool *
8556 s390_start_pool (struct constant_pool **pool_list, rtx_insn *insn)
8557 {
8558 struct constant_pool *pool, **prev;
8559
8560 pool = s390_alloc_pool ();
8561 pool->first_insn = insn;
8562
8563 for (prev = pool_list; *prev; prev = &(*prev)->next)
8564 ;
8565 *prev = pool;
8566
8567 return pool;
8568 }
8569
8570 /* End range of instructions covered by POOL at INSN and emit
8571 placeholder insn representing the pool. */
8572
8573 static void
8574 s390_end_pool (struct constant_pool *pool, rtx_insn *insn)
8575 {
8576 rtx pool_size = GEN_INT (pool->size + 8 /* alignment slop */);
8577
8578 if (!insn)
8579 insn = get_last_insn ();
8580
8581 pool->pool_insn = emit_insn_after (gen_pool (pool_size), insn);
8582 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8583 }
8584
8585 /* Add INSN to the list of insns covered by POOL. */
8586
8587 static void
8588 s390_add_pool_insn (struct constant_pool *pool, rtx insn)
8589 {
8590 bitmap_set_bit (pool->insns, INSN_UID (insn));
8591 }
8592
8593 /* Return pool out of POOL_LIST that covers INSN. */
8594
8595 static struct constant_pool *
8596 s390_find_pool (struct constant_pool *pool_list, rtx insn)
8597 {
8598 struct constant_pool *pool;
8599
8600 for (pool = pool_list; pool; pool = pool->next)
8601 if (bitmap_bit_p (pool->insns, INSN_UID (insn)))
8602 break;
8603
8604 return pool;
8605 }
8606
8607 /* Add constant VAL of mode MODE to the constant pool POOL. */
8608
8609 static void
8610 s390_add_constant (struct constant_pool *pool, rtx val, machine_mode mode)
8611 {
8612 struct constant *c;
8613 int i;
8614
8615 for (i = 0; i < NR_C_MODES; i++)
8616 if (constant_modes[i] == mode)
8617 break;
8618 gcc_assert (i != NR_C_MODES);
8619
8620 for (c = pool->constants[i]; c != NULL; c = c->next)
8621 if (rtx_equal_p (val, c->value))
8622 break;
8623
8624 if (c == NULL)
8625 {
8626 c = (struct constant *) xmalloc (sizeof *c);
8627 c->value = val;
8628 c->label = gen_label_rtx ();
8629 c->next = pool->constants[i];
8630 pool->constants[i] = c;
8631 pool->size += GET_MODE_SIZE (mode);
8632 }
8633 }
8634
8635 /* Return an rtx that represents the offset of X from the start of
8636 pool POOL. */
8637
8638 static rtx
8639 s390_pool_offset (struct constant_pool *pool, rtx x)
8640 {
8641 rtx label;
8642
8643 label = gen_rtx_LABEL_REF (GET_MODE (x), pool->label);
8644 x = gen_rtx_UNSPEC (GET_MODE (x), gen_rtvec (2, x, label),
8645 UNSPEC_POOL_OFFSET);
8646 return gen_rtx_CONST (GET_MODE (x), x);
8647 }
8648
8649 /* Find constant VAL of mode MODE in the constant pool POOL.
8650 Return an RTX describing the distance from the start of
8651 the pool to the location of the new constant. */
8652
8653 static rtx
8654 s390_find_constant (struct constant_pool *pool, rtx val,
8655 machine_mode mode)
8656 {
8657 struct constant *c;
8658 int i;
8659
8660 for (i = 0; i < NR_C_MODES; i++)
8661 if (constant_modes[i] == mode)
8662 break;
8663 gcc_assert (i != NR_C_MODES);
8664
8665 for (c = pool->constants[i]; c != NULL; c = c->next)
8666 if (rtx_equal_p (val, c->value))
8667 break;
8668
8669 gcc_assert (c);
8670
8671 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8672 }
8673
8674 /* Check whether INSN is an execute. Return the label_ref to its
8675 execute target template if so, NULL_RTX otherwise. */
8676
8677 static rtx
8678 s390_execute_label (rtx insn)
8679 {
8680 if (NONJUMP_INSN_P (insn)
8681 && GET_CODE (PATTERN (insn)) == PARALLEL
8682 && GET_CODE (XVECEXP (PATTERN (insn), 0, 0)) == UNSPEC
8683 && XINT (XVECEXP (PATTERN (insn), 0, 0), 1) == UNSPEC_EXECUTE)
8684 return XVECEXP (XVECEXP (PATTERN (insn), 0, 0), 0, 2);
8685
8686 return NULL_RTX;
8687 }
8688
8689 /* Add execute target for INSN to the constant pool POOL. */
8690
8691 static void
8692 s390_add_execute (struct constant_pool *pool, rtx insn)
8693 {
8694 struct constant *c;
8695
8696 for (c = pool->execute; c != NULL; c = c->next)
8697 if (INSN_UID (insn) == INSN_UID (c->value))
8698 break;
8699
8700 if (c == NULL)
8701 {
8702 c = (struct constant *) xmalloc (sizeof *c);
8703 c->value = insn;
8704 c->label = gen_label_rtx ();
8705 c->next = pool->execute;
8706 pool->execute = c;
8707 pool->size += 6;
8708 }
8709 }
8710
8711 /* Find execute target for INSN in the constant pool POOL.
8712 Return an RTX describing the distance from the start of
8713 the pool to the location of the execute target. */
8714
8715 static rtx
8716 s390_find_execute (struct constant_pool *pool, rtx insn)
8717 {
8718 struct constant *c;
8719
8720 for (c = pool->execute; c != NULL; c = c->next)
8721 if (INSN_UID (insn) == INSN_UID (c->value))
8722 break;
8723
8724 gcc_assert (c);
8725
8726 return s390_pool_offset (pool, gen_rtx_LABEL_REF (Pmode, c->label));
8727 }
8728
8729 /* For an execute INSN, extract the execute target template. */
8730
8731 static rtx
8732 s390_execute_target (rtx insn)
8733 {
8734 rtx pattern = PATTERN (insn);
8735 gcc_assert (s390_execute_label (insn));
8736
8737 if (XVECLEN (pattern, 0) == 2)
8738 {
8739 pattern = copy_rtx (XVECEXP (pattern, 0, 1));
8740 }
8741 else
8742 {
8743 rtvec vec = rtvec_alloc (XVECLEN (pattern, 0) - 1);
8744 int i;
8745
8746 for (i = 0; i < XVECLEN (pattern, 0) - 1; i++)
8747 RTVEC_ELT (vec, i) = copy_rtx (XVECEXP (pattern, 0, i + 1));
8748
8749 pattern = gen_rtx_PARALLEL (VOIDmode, vec);
8750 }
8751
8752 return pattern;
8753 }
8754
8755 /* Indicate that INSN cannot be duplicated. This is the case for
8756 execute insns that carry a unique label. */
8757
8758 static bool
8759 s390_cannot_copy_insn_p (rtx_insn *insn)
8760 {
8761 rtx label = s390_execute_label (insn);
8762 return label && label != const0_rtx;
8763 }
8764
8765 /* Dump out the constants in POOL. If REMOTE_LABEL is true,
8766 do not emit the pool base label. */
8767
8768 static void
8769 s390_dump_pool (struct constant_pool *pool, bool remote_label)
8770 {
8771 struct constant *c;
8772 rtx_insn *insn = pool->pool_insn;
8773 int i;
8774
8775 /* Switch to rodata section. */
8776 if (TARGET_CPU_ZARCH)
8777 {
8778 insn = emit_insn_after (gen_pool_section_start (), insn);
8779 INSN_ADDRESSES_NEW (insn, -1);
8780 }
8781
8782 /* Ensure minimum pool alignment. */
8783 if (TARGET_CPU_ZARCH)
8784 insn = emit_insn_after (gen_pool_align (GEN_INT (8)), insn);
8785 else
8786 insn = emit_insn_after (gen_pool_align (GEN_INT (4)), insn);
8787 INSN_ADDRESSES_NEW (insn, -1);
8788
8789 /* Emit pool base label. */
8790 if (!remote_label)
8791 {
8792 insn = emit_label_after (pool->label, insn);
8793 INSN_ADDRESSES_NEW (insn, -1);
8794 }
8795
8796 /* Dump constants in descending alignment requirement order,
8797 ensuring proper alignment for every constant. */
8798 for (i = 0; i < NR_C_MODES; i++)
8799 for (c = pool->constants[i]; c; c = c->next)
8800 {
8801 /* Convert UNSPEC_LTREL_OFFSET unspecs to pool-relative references. */
8802 rtx value = copy_rtx (c->value);
8803 if (GET_CODE (value) == CONST
8804 && GET_CODE (XEXP (value, 0)) == UNSPEC
8805 && XINT (XEXP (value, 0), 1) == UNSPEC_LTREL_OFFSET
8806 && XVECLEN (XEXP (value, 0), 0) == 1)
8807 value = s390_pool_offset (pool, XVECEXP (XEXP (value, 0), 0, 0));
8808
8809 insn = emit_label_after (c->label, insn);
8810 INSN_ADDRESSES_NEW (insn, -1);
8811
8812 value = gen_rtx_UNSPEC_VOLATILE (constant_modes[i],
8813 gen_rtvec (1, value),
8814 UNSPECV_POOL_ENTRY);
8815 insn = emit_insn_after (value, insn);
8816 INSN_ADDRESSES_NEW (insn, -1);
8817 }
8818
8819 /* Ensure minimum alignment for instructions. */
8820 insn = emit_insn_after (gen_pool_align (GEN_INT (2)), insn);
8821 INSN_ADDRESSES_NEW (insn, -1);
8822
8823 /* Output in-pool execute template insns. */
8824 for (c = pool->execute; c; c = c->next)
8825 {
8826 insn = emit_label_after (c->label, insn);
8827 INSN_ADDRESSES_NEW (insn, -1);
8828
8829 insn = emit_insn_after (s390_execute_target (c->value), insn);
8830 INSN_ADDRESSES_NEW (insn, -1);
8831 }
8832
8833 /* Switch back to previous section. */
8834 if (TARGET_CPU_ZARCH)
8835 {
8836 insn = emit_insn_after (gen_pool_section_end (), insn);
8837 INSN_ADDRESSES_NEW (insn, -1);
8838 }
8839
8840 insn = emit_barrier_after (insn);
8841 INSN_ADDRESSES_NEW (insn, -1);
8842
8843 /* Remove placeholder insn. */
8844 remove_insn (pool->pool_insn);
8845 }
8846
8847 /* Free all memory used by POOL. */
8848
8849 static void
8850 s390_free_pool (struct constant_pool *pool)
8851 {
8852 struct constant *c, *next;
8853 int i;
8854
8855 for (i = 0; i < NR_C_MODES; i++)
8856 for (c = pool->constants[i]; c; c = next)
8857 {
8858 next = c->next;
8859 free (c);
8860 }
8861
8862 for (c = pool->execute; c; c = next)
8863 {
8864 next = c->next;
8865 free (c);
8866 }
8867
8868 BITMAP_FREE (pool->insns);
8869 free (pool);
8870 }
8871
8872
8873 /* Collect main literal pool. Return NULL on overflow. */
8874
8875 static struct constant_pool *
8876 s390_mainpool_start (void)
8877 {
8878 struct constant_pool *pool;
8879 rtx_insn *insn;
8880
8881 pool = s390_alloc_pool ();
8882
8883 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
8884 {
8885 if (NONJUMP_INSN_P (insn)
8886 && GET_CODE (PATTERN (insn)) == SET
8887 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC_VOLATILE
8888 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPECV_MAIN_POOL)
8889 {
8890 /* There might be two main_pool instructions if base_reg
8891 is call-clobbered; one for shrink-wrapped code and one
8892 for the rest. We want to keep the first. */
8893 if (pool->pool_insn)
8894 {
8895 insn = PREV_INSN (insn);
8896 delete_insn (NEXT_INSN (insn));
8897 continue;
8898 }
8899 pool->pool_insn = insn;
8900 }
8901
8902 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
8903 {
8904 s390_add_execute (pool, insn);
8905 }
8906 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
8907 {
8908 rtx pool_ref = NULL_RTX;
8909 find_constant_pool_ref (PATTERN (insn), &pool_ref);
8910 if (pool_ref)
8911 {
8912 rtx constant = get_pool_constant (pool_ref);
8913 machine_mode mode = get_pool_mode (pool_ref);
8914 s390_add_constant (pool, constant, mode);
8915 }
8916 }
8917
8918 /* If hot/cold partitioning is enabled we have to make sure that
8919 the literal pool is emitted in the same section where the
8920 initialization of the literal pool base pointer takes place.
8921 emit_pool_after is only used in the non-overflow case on non
8922 Z cpus where we can emit the literal pool at the end of the
8923 function body within the text section. */
8924 if (NOTE_P (insn)
8925 && NOTE_KIND (insn) == NOTE_INSN_SWITCH_TEXT_SECTIONS
8926 && !pool->emit_pool_after)
8927 pool->emit_pool_after = PREV_INSN (insn);
8928 }
8929
8930 gcc_assert (pool->pool_insn || pool->size == 0);
8931
8932 if (pool->size >= 4096)
8933 {
8934 /* We're going to chunkify the pool, so remove the main
8935 pool placeholder insn. */
8936 remove_insn (pool->pool_insn);
8937
8938 s390_free_pool (pool);
8939 pool = NULL;
8940 }
8941
8942 /* If the functions ends with the section where the literal pool
8943 should be emitted set the marker to its end. */
8944 if (pool && !pool->emit_pool_after)
8945 pool->emit_pool_after = get_last_insn ();
8946
8947 return pool;
8948 }
8949
8950 /* POOL holds the main literal pool as collected by s390_mainpool_start.
8951 Modify the current function to output the pool constants as well as
8952 the pool register setup instruction. */
8953
8954 static void
8955 s390_mainpool_finish (struct constant_pool *pool)
8956 {
8957 rtx base_reg = cfun->machine->base_reg;
8958
8959 /* If the pool is empty, we're done. */
8960 if (pool->size == 0)
8961 {
8962 /* We don't actually need a base register after all. */
8963 cfun->machine->base_reg = NULL_RTX;
8964
8965 if (pool->pool_insn)
8966 remove_insn (pool->pool_insn);
8967 s390_free_pool (pool);
8968 return;
8969 }
8970
8971 /* We need correct insn addresses. */
8972 shorten_branches (get_insns ());
8973
8974 /* On zSeries, we use a LARL to load the pool register. The pool is
8975 located in the .rodata section, so we emit it after the function. */
8976 if (TARGET_CPU_ZARCH)
8977 {
8978 rtx set = gen_main_base_64 (base_reg, pool->label);
8979 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8980 INSN_ADDRESSES_NEW (insn, -1);
8981 remove_insn (pool->pool_insn);
8982
8983 insn = get_last_insn ();
8984 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
8985 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
8986
8987 s390_dump_pool (pool, 0);
8988 }
8989
8990 /* On S/390, if the total size of the function's code plus literal pool
8991 does not exceed 4096 bytes, we use BASR to set up a function base
8992 pointer, and emit the literal pool at the end of the function. */
8993 else if (INSN_ADDRESSES (INSN_UID (pool->emit_pool_after))
8994 + pool->size + 8 /* alignment slop */ < 4096)
8995 {
8996 rtx set = gen_main_base_31_small (base_reg, pool->label);
8997 rtx_insn *insn = emit_insn_after (set, pool->pool_insn);
8998 INSN_ADDRESSES_NEW (insn, -1);
8999 remove_insn (pool->pool_insn);
9000
9001 insn = emit_label_after (pool->label, insn);
9002 INSN_ADDRESSES_NEW (insn, -1);
9003
9004 /* emit_pool_after will be set by s390_mainpool_start to the
9005 last insn of the section where the literal pool should be
9006 emitted. */
9007 insn = pool->emit_pool_after;
9008
9009 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9010 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9011
9012 s390_dump_pool (pool, 1);
9013 }
9014
9015 /* Otherwise, we emit an inline literal pool and use BASR to branch
9016 over it, setting up the pool register at the same time. */
9017 else
9018 {
9019 rtx_code_label *pool_end = gen_label_rtx ();
9020
9021 rtx pat = gen_main_base_31_large (base_reg, pool->label, pool_end);
9022 rtx_insn *insn = emit_jump_insn_after (pat, pool->pool_insn);
9023 JUMP_LABEL (insn) = pool_end;
9024 INSN_ADDRESSES_NEW (insn, -1);
9025 remove_insn (pool->pool_insn);
9026
9027 insn = emit_label_after (pool->label, insn);
9028 INSN_ADDRESSES_NEW (insn, -1);
9029
9030 pool->pool_insn = emit_insn_after (gen_pool (const0_rtx), insn);
9031 INSN_ADDRESSES_NEW (pool->pool_insn, -1);
9032
9033 insn = emit_label_after (pool_end, pool->pool_insn);
9034 INSN_ADDRESSES_NEW (insn, -1);
9035
9036 s390_dump_pool (pool, 1);
9037 }
9038
9039
9040 /* Replace all literal pool references. */
9041
9042 for (rtx_insn *insn = get_insns (); insn; insn = NEXT_INSN (insn))
9043 {
9044 if (INSN_P (insn))
9045 replace_ltrel_base (&PATTERN (insn));
9046
9047 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9048 {
9049 rtx addr, pool_ref = NULL_RTX;
9050 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9051 if (pool_ref)
9052 {
9053 if (s390_execute_label (insn))
9054 addr = s390_find_execute (pool, insn);
9055 else
9056 addr = s390_find_constant (pool, get_pool_constant (pool_ref),
9057 get_pool_mode (pool_ref));
9058
9059 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9060 INSN_CODE (insn) = -1;
9061 }
9062 }
9063 }
9064
9065
9066 /* Free the pool. */
9067 s390_free_pool (pool);
9068 }
9069
9070 /* POOL holds the main literal pool as collected by s390_mainpool_start.
9071 We have decided we cannot use this pool, so revert all changes
9072 to the current function that were done by s390_mainpool_start. */
9073 static void
9074 s390_mainpool_cancel (struct constant_pool *pool)
9075 {
9076 /* We didn't actually change the instruction stream, so simply
9077 free the pool memory. */
9078 s390_free_pool (pool);
9079 }
9080
9081
9082 /* Chunkify the literal pool. */
9083
9084 #define S390_POOL_CHUNK_MIN 0xc00
9085 #define S390_POOL_CHUNK_MAX 0xe00
9086
9087 static struct constant_pool *
9088 s390_chunkify_start (void)
9089 {
9090 struct constant_pool *curr_pool = NULL, *pool_list = NULL;
9091 int extra_size = 0;
9092 bitmap far_labels;
9093 rtx pending_ltrel = NULL_RTX;
9094 rtx_insn *insn;
9095
9096 rtx (*gen_reload_base) (rtx, rtx) =
9097 TARGET_CPU_ZARCH? gen_reload_base_64 : gen_reload_base_31;
9098
9099
9100 /* We need correct insn addresses. */
9101
9102 shorten_branches (get_insns ());
9103
9104 /* Scan all insns and move literals to pool chunks. */
9105
9106 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9107 {
9108 bool section_switch_p = false;
9109
9110 /* Check for pending LTREL_BASE. */
9111 if (INSN_P (insn))
9112 {
9113 rtx ltrel_base = find_ltrel_base (PATTERN (insn));
9114 if (ltrel_base)
9115 {
9116 gcc_assert (ltrel_base == pending_ltrel);
9117 pending_ltrel = NULL_RTX;
9118 }
9119 }
9120
9121 if (!TARGET_CPU_ZARCH && s390_execute_label (insn))
9122 {
9123 if (!curr_pool)
9124 curr_pool = s390_start_pool (&pool_list, insn);
9125
9126 s390_add_execute (curr_pool, insn);
9127 s390_add_pool_insn (curr_pool, insn);
9128 }
9129 else if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9130 {
9131 rtx pool_ref = NULL_RTX;
9132 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9133 if (pool_ref)
9134 {
9135 rtx constant = get_pool_constant (pool_ref);
9136 machine_mode mode = get_pool_mode (pool_ref);
9137
9138 if (!curr_pool)
9139 curr_pool = s390_start_pool (&pool_list, insn);
9140
9141 s390_add_constant (curr_pool, constant, mode);
9142 s390_add_pool_insn (curr_pool, insn);
9143
9144 /* Don't split the pool chunk between a LTREL_OFFSET load
9145 and the corresponding LTREL_BASE. */
9146 if (GET_CODE (constant) == CONST
9147 && GET_CODE (XEXP (constant, 0)) == UNSPEC
9148 && XINT (XEXP (constant, 0), 1) == UNSPEC_LTREL_OFFSET)
9149 {
9150 gcc_assert (!pending_ltrel);
9151 pending_ltrel = pool_ref;
9152 }
9153 }
9154 }
9155
9156 if (JUMP_P (insn) || JUMP_TABLE_DATA_P (insn) || LABEL_P (insn))
9157 {
9158 if (curr_pool)
9159 s390_add_pool_insn (curr_pool, insn);
9160 /* An LTREL_BASE must follow within the same basic block. */
9161 gcc_assert (!pending_ltrel);
9162 }
9163
9164 if (NOTE_P (insn))
9165 switch (NOTE_KIND (insn))
9166 {
9167 case NOTE_INSN_SWITCH_TEXT_SECTIONS:
9168 section_switch_p = true;
9169 break;
9170 case NOTE_INSN_VAR_LOCATION:
9171 case NOTE_INSN_CALL_ARG_LOCATION:
9172 continue;
9173 default:
9174 break;
9175 }
9176
9177 if (!curr_pool
9178 || INSN_ADDRESSES_SIZE () <= (size_t) INSN_UID (insn)
9179 || INSN_ADDRESSES (INSN_UID (insn)) == -1)
9180 continue;
9181
9182 if (TARGET_CPU_ZARCH)
9183 {
9184 if (curr_pool->size < S390_POOL_CHUNK_MAX)
9185 continue;
9186
9187 s390_end_pool (curr_pool, NULL);
9188 curr_pool = NULL;
9189 }
9190 else
9191 {
9192 int chunk_size = INSN_ADDRESSES (INSN_UID (insn))
9193 - INSN_ADDRESSES (INSN_UID (curr_pool->first_insn))
9194 + extra_size;
9195
9196 /* We will later have to insert base register reload insns.
9197 Those will have an effect on code size, which we need to
9198 consider here. This calculation makes rather pessimistic
9199 worst-case assumptions. */
9200 if (LABEL_P (insn))
9201 extra_size += 6;
9202
9203 if (chunk_size < S390_POOL_CHUNK_MIN
9204 && curr_pool->size < S390_POOL_CHUNK_MIN
9205 && !section_switch_p)
9206 continue;
9207
9208 /* Pool chunks can only be inserted after BARRIERs ... */
9209 if (BARRIER_P (insn))
9210 {
9211 s390_end_pool (curr_pool, insn);
9212 curr_pool = NULL;
9213 extra_size = 0;
9214 }
9215
9216 /* ... so if we don't find one in time, create one. */
9217 else if (chunk_size > S390_POOL_CHUNK_MAX
9218 || curr_pool->size > S390_POOL_CHUNK_MAX
9219 || section_switch_p)
9220 {
9221 rtx_insn *label, *jump, *barrier, *next, *prev;
9222
9223 if (!section_switch_p)
9224 {
9225 /* We can insert the barrier only after a 'real' insn. */
9226 if (! NONJUMP_INSN_P (insn) && ! CALL_P (insn))
9227 continue;
9228 if (get_attr_length (insn) == 0)
9229 continue;
9230 /* Don't separate LTREL_BASE from the corresponding
9231 LTREL_OFFSET load. */
9232 if (pending_ltrel)
9233 continue;
9234 next = insn;
9235 do
9236 {
9237 insn = next;
9238 next = NEXT_INSN (insn);
9239 }
9240 while (next
9241 && NOTE_P (next)
9242 && (NOTE_KIND (next) == NOTE_INSN_VAR_LOCATION
9243 || NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION));
9244 }
9245 else
9246 {
9247 gcc_assert (!pending_ltrel);
9248
9249 /* The old pool has to end before the section switch
9250 note in order to make it part of the current
9251 section. */
9252 insn = PREV_INSN (insn);
9253 }
9254
9255 label = gen_label_rtx ();
9256 prev = insn;
9257 if (prev && NOTE_P (prev))
9258 prev = prev_nonnote_insn (prev);
9259 if (prev)
9260 jump = emit_jump_insn_after_setloc (gen_jump (label), insn,
9261 INSN_LOCATION (prev));
9262 else
9263 jump = emit_jump_insn_after_noloc (gen_jump (label), insn);
9264 barrier = emit_barrier_after (jump);
9265 insn = emit_label_after (label, barrier);
9266 JUMP_LABEL (jump) = label;
9267 LABEL_NUSES (label) = 1;
9268
9269 INSN_ADDRESSES_NEW (jump, -1);
9270 INSN_ADDRESSES_NEW (barrier, -1);
9271 INSN_ADDRESSES_NEW (insn, -1);
9272
9273 s390_end_pool (curr_pool, barrier);
9274 curr_pool = NULL;
9275 extra_size = 0;
9276 }
9277 }
9278 }
9279
9280 if (curr_pool)
9281 s390_end_pool (curr_pool, NULL);
9282 gcc_assert (!pending_ltrel);
9283
9284 /* Find all labels that are branched into
9285 from an insn belonging to a different chunk. */
9286
9287 far_labels = BITMAP_ALLOC (NULL);
9288
9289 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9290 {
9291 rtx_jump_table_data *table;
9292
9293 /* Labels marked with LABEL_PRESERVE_P can be target
9294 of non-local jumps, so we have to mark them.
9295 The same holds for named labels.
9296
9297 Don't do that, however, if it is the label before
9298 a jump table. */
9299
9300 if (LABEL_P (insn)
9301 && (LABEL_PRESERVE_P (insn) || LABEL_NAME (insn)))
9302 {
9303 rtx_insn *vec_insn = NEXT_INSN (insn);
9304 if (! vec_insn || ! JUMP_TABLE_DATA_P (vec_insn))
9305 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (insn));
9306 }
9307 /* Check potential targets in a table jump (casesi_jump). */
9308 else if (tablejump_p (insn, NULL, &table))
9309 {
9310 rtx vec_pat = PATTERN (table);
9311 int i, diff_p = GET_CODE (vec_pat) == ADDR_DIFF_VEC;
9312
9313 for (i = 0; i < XVECLEN (vec_pat, diff_p); i++)
9314 {
9315 rtx label = XEXP (XVECEXP (vec_pat, diff_p, i), 0);
9316
9317 if (s390_find_pool (pool_list, label)
9318 != s390_find_pool (pool_list, insn))
9319 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9320 }
9321 }
9322 /* If we have a direct jump (conditional or unconditional),
9323 check all potential targets. */
9324 else if (JUMP_P (insn))
9325 {
9326 rtx pat = PATTERN (insn);
9327
9328 if (GET_CODE (pat) == PARALLEL)
9329 pat = XVECEXP (pat, 0, 0);
9330
9331 if (GET_CODE (pat) == SET)
9332 {
9333 rtx label = JUMP_LABEL (insn);
9334 if (label && !ANY_RETURN_P (label))
9335 {
9336 if (s390_find_pool (pool_list, label)
9337 != s390_find_pool (pool_list, insn))
9338 bitmap_set_bit (far_labels, CODE_LABEL_NUMBER (label));
9339 }
9340 }
9341 }
9342 }
9343
9344 /* Insert base register reload insns before every pool. */
9345
9346 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9347 {
9348 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9349 curr_pool->label);
9350 rtx_insn *insn = curr_pool->first_insn;
9351 INSN_ADDRESSES_NEW (emit_insn_before (new_insn, insn), -1);
9352 }
9353
9354 /* Insert base register reload insns at every far label. */
9355
9356 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9357 if (LABEL_P (insn)
9358 && bitmap_bit_p (far_labels, CODE_LABEL_NUMBER (insn)))
9359 {
9360 struct constant_pool *pool = s390_find_pool (pool_list, insn);
9361 if (pool)
9362 {
9363 rtx new_insn = gen_reload_base (cfun->machine->base_reg,
9364 pool->label);
9365 INSN_ADDRESSES_NEW (emit_insn_after (new_insn, insn), -1);
9366 }
9367 }
9368
9369
9370 BITMAP_FREE (far_labels);
9371
9372
9373 /* Recompute insn addresses. */
9374
9375 init_insn_lengths ();
9376 shorten_branches (get_insns ());
9377
9378 return pool_list;
9379 }
9380
9381 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9382 After we have decided to use this list, finish implementing
9383 all changes to the current function as required. */
9384
9385 static void
9386 s390_chunkify_finish (struct constant_pool *pool_list)
9387 {
9388 struct constant_pool *curr_pool = NULL;
9389 rtx_insn *insn;
9390
9391
9392 /* Replace all literal pool references. */
9393
9394 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
9395 {
9396 if (INSN_P (insn))
9397 replace_ltrel_base (&PATTERN (insn));
9398
9399 curr_pool = s390_find_pool (pool_list, insn);
9400 if (!curr_pool)
9401 continue;
9402
9403 if (NONJUMP_INSN_P (insn) || CALL_P (insn))
9404 {
9405 rtx addr, pool_ref = NULL_RTX;
9406 find_constant_pool_ref (PATTERN (insn), &pool_ref);
9407 if (pool_ref)
9408 {
9409 if (s390_execute_label (insn))
9410 addr = s390_find_execute (curr_pool, insn);
9411 else
9412 addr = s390_find_constant (curr_pool,
9413 get_pool_constant (pool_ref),
9414 get_pool_mode (pool_ref));
9415
9416 replace_constant_pool_ref (&PATTERN (insn), pool_ref, addr);
9417 INSN_CODE (insn) = -1;
9418 }
9419 }
9420 }
9421
9422 /* Dump out all literal pools. */
9423
9424 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9425 s390_dump_pool (curr_pool, 0);
9426
9427 /* Free pool list. */
9428
9429 while (pool_list)
9430 {
9431 struct constant_pool *next = pool_list->next;
9432 s390_free_pool (pool_list);
9433 pool_list = next;
9434 }
9435 }
9436
9437 /* POOL_LIST is a chunk list as prepared by s390_chunkify_start.
9438 We have decided we cannot use this list, so revert all changes
9439 to the current function that were done by s390_chunkify_start. */
9440
9441 static void
9442 s390_chunkify_cancel (struct constant_pool *pool_list)
9443 {
9444 struct constant_pool *curr_pool = NULL;
9445 rtx_insn *insn;
9446
9447 /* Remove all pool placeholder insns. */
9448
9449 for (curr_pool = pool_list; curr_pool; curr_pool = curr_pool->next)
9450 {
9451 /* Did we insert an extra barrier? Remove it. */
9452 rtx_insn *barrier = PREV_INSN (curr_pool->pool_insn);
9453 rtx_insn *jump = barrier? PREV_INSN (barrier) : NULL;
9454 rtx_insn *label = NEXT_INSN (curr_pool->pool_insn);
9455
9456 if (jump && JUMP_P (jump)
9457 && barrier && BARRIER_P (barrier)
9458 && label && LABEL_P (label)
9459 && GET_CODE (PATTERN (jump)) == SET
9460 && SET_DEST (PATTERN (jump)) == pc_rtx
9461 && GET_CODE (SET_SRC (PATTERN (jump))) == LABEL_REF
9462 && XEXP (SET_SRC (PATTERN (jump)), 0) == label)
9463 {
9464 remove_insn (jump);
9465 remove_insn (barrier);
9466 remove_insn (label);
9467 }
9468
9469 remove_insn (curr_pool->pool_insn);
9470 }
9471
9472 /* Remove all base register reload insns. */
9473
9474 for (insn = get_insns (); insn; )
9475 {
9476 rtx_insn *next_insn = NEXT_INSN (insn);
9477
9478 if (NONJUMP_INSN_P (insn)
9479 && GET_CODE (PATTERN (insn)) == SET
9480 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
9481 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_RELOAD_BASE)
9482 remove_insn (insn);
9483
9484 insn = next_insn;
9485 }
9486
9487 /* Free pool list. */
9488
9489 while (pool_list)
9490 {
9491 struct constant_pool *next = pool_list->next;
9492 s390_free_pool (pool_list);
9493 pool_list = next;
9494 }
9495 }
9496
9497 /* Output the constant pool entry EXP in mode MODE with alignment ALIGN. */
9498
9499 void
9500 s390_output_pool_entry (rtx exp, machine_mode mode, unsigned int align)
9501 {
9502 switch (GET_MODE_CLASS (mode))
9503 {
9504 case MODE_FLOAT:
9505 case MODE_DECIMAL_FLOAT:
9506 gcc_assert (GET_CODE (exp) == CONST_DOUBLE);
9507
9508 assemble_real (*CONST_DOUBLE_REAL_VALUE (exp),
9509 as_a <scalar_float_mode> (mode), align);
9510 break;
9511
9512 case MODE_INT:
9513 assemble_integer (exp, GET_MODE_SIZE (mode), align, 1);
9514 mark_symbol_refs_as_used (exp);
9515 break;
9516
9517 case MODE_VECTOR_INT:
9518 case MODE_VECTOR_FLOAT:
9519 {
9520 int i;
9521 machine_mode inner_mode;
9522 gcc_assert (GET_CODE (exp) == CONST_VECTOR);
9523
9524 inner_mode = GET_MODE_INNER (GET_MODE (exp));
9525 for (i = 0; i < XVECLEN (exp, 0); i++)
9526 s390_output_pool_entry (XVECEXP (exp, 0, i),
9527 inner_mode,
9528 i == 0
9529 ? align
9530 : GET_MODE_BITSIZE (inner_mode));
9531 }
9532 break;
9533
9534 default:
9535 gcc_unreachable ();
9536 }
9537 }
9538
9539
9540 /* Return an RTL expression representing the value of the return address
9541 for the frame COUNT steps up from the current frame. FRAME is the
9542 frame pointer of that frame. */
9543
9544 rtx
9545 s390_return_addr_rtx (int count, rtx frame ATTRIBUTE_UNUSED)
9546 {
9547 int offset;
9548 rtx addr;
9549
9550 /* Without backchain, we fail for all but the current frame. */
9551
9552 if (!TARGET_BACKCHAIN && count > 0)
9553 return NULL_RTX;
9554
9555 /* For the current frame, we need to make sure the initial
9556 value of RETURN_REGNUM is actually saved. */
9557
9558 if (count == 0)
9559 {
9560 /* On non-z architectures branch splitting could overwrite r14. */
9561 if (TARGET_CPU_ZARCH)
9562 return get_hard_reg_initial_val (Pmode, RETURN_REGNUM);
9563 else
9564 {
9565 cfun_frame_layout.save_return_addr_p = true;
9566 return gen_rtx_MEM (Pmode, return_address_pointer_rtx);
9567 }
9568 }
9569
9570 if (TARGET_PACKED_STACK)
9571 offset = -2 * UNITS_PER_LONG;
9572 else
9573 offset = RETURN_REGNUM * UNITS_PER_LONG;
9574
9575 addr = plus_constant (Pmode, frame, offset);
9576 addr = memory_address (Pmode, addr);
9577 return gen_rtx_MEM (Pmode, addr);
9578 }
9579
9580 /* Return an RTL expression representing the back chain stored in
9581 the current stack frame. */
9582
9583 rtx
9584 s390_back_chain_rtx (void)
9585 {
9586 rtx chain;
9587
9588 gcc_assert (TARGET_BACKCHAIN);
9589
9590 if (TARGET_PACKED_STACK)
9591 chain = plus_constant (Pmode, stack_pointer_rtx,
9592 STACK_POINTER_OFFSET - UNITS_PER_LONG);
9593 else
9594 chain = stack_pointer_rtx;
9595
9596 chain = gen_rtx_MEM (Pmode, chain);
9597 return chain;
9598 }
9599
9600 /* Find first call clobbered register unused in a function.
9601 This could be used as base register in a leaf function
9602 or for holding the return address before epilogue. */
9603
9604 static int
9605 find_unused_clobbered_reg (void)
9606 {
9607 int i;
9608 for (i = 0; i < 6; i++)
9609 if (!df_regs_ever_live_p (i))
9610 return i;
9611 return 0;
9612 }
9613
9614
9615 /* Helper function for s390_regs_ever_clobbered. Sets the fields in DATA for all
9616 clobbered hard regs in SETREG. */
9617
9618 static void
9619 s390_reg_clobbered_rtx (rtx setreg, const_rtx set_insn ATTRIBUTE_UNUSED, void *data)
9620 {
9621 char *regs_ever_clobbered = (char *)data;
9622 unsigned int i, regno;
9623 machine_mode mode = GET_MODE (setreg);
9624
9625 if (GET_CODE (setreg) == SUBREG)
9626 {
9627 rtx inner = SUBREG_REG (setreg);
9628 if (!GENERAL_REG_P (inner) && !FP_REG_P (inner))
9629 return;
9630 regno = subreg_regno (setreg);
9631 }
9632 else if (GENERAL_REG_P (setreg) || FP_REG_P (setreg))
9633 regno = REGNO (setreg);
9634 else
9635 return;
9636
9637 for (i = regno;
9638 i < regno + HARD_REGNO_NREGS (regno, mode);
9639 i++)
9640 regs_ever_clobbered[i] = 1;
9641 }
9642
9643 /* Walks through all basic blocks of the current function looking
9644 for clobbered hard regs using s390_reg_clobbered_rtx. The fields
9645 of the passed integer array REGS_EVER_CLOBBERED are set to one for
9646 each of those regs. */
9647
9648 static void
9649 s390_regs_ever_clobbered (char regs_ever_clobbered[])
9650 {
9651 basic_block cur_bb;
9652 rtx_insn *cur_insn;
9653 unsigned int i;
9654
9655 memset (regs_ever_clobbered, 0, 32);
9656
9657 /* For non-leaf functions we have to consider all call clobbered regs to be
9658 clobbered. */
9659 if (!crtl->is_leaf)
9660 {
9661 for (i = 0; i < 32; i++)
9662 regs_ever_clobbered[i] = call_really_used_regs[i];
9663 }
9664
9665 /* Make the "magic" eh_return registers live if necessary. For regs_ever_live
9666 this work is done by liveness analysis (mark_regs_live_at_end).
9667 Special care is needed for functions containing landing pads. Landing pads
9668 may use the eh registers, but the code which sets these registers is not
9669 contained in that function. Hence s390_regs_ever_clobbered is not able to
9670 deal with this automatically. */
9671 if (crtl->calls_eh_return || cfun->machine->has_landing_pad_p)
9672 for (i = 0; EH_RETURN_DATA_REGNO (i) != INVALID_REGNUM ; i++)
9673 if (crtl->calls_eh_return
9674 || (cfun->machine->has_landing_pad_p
9675 && df_regs_ever_live_p (EH_RETURN_DATA_REGNO (i))))
9676 regs_ever_clobbered[EH_RETURN_DATA_REGNO (i)] = 1;
9677
9678 /* For nonlocal gotos all call-saved registers have to be saved.
9679 This flag is also set for the unwinding code in libgcc.
9680 See expand_builtin_unwind_init. For regs_ever_live this is done by
9681 reload. */
9682 if (crtl->saves_all_registers)
9683 for (i = 0; i < 32; i++)
9684 if (!call_really_used_regs[i])
9685 regs_ever_clobbered[i] = 1;
9686
9687 FOR_EACH_BB_FN (cur_bb, cfun)
9688 {
9689 FOR_BB_INSNS (cur_bb, cur_insn)
9690 {
9691 rtx pat;
9692
9693 if (!INSN_P (cur_insn))
9694 continue;
9695
9696 pat = PATTERN (cur_insn);
9697
9698 /* Ignore GPR restore insns. */
9699 if (epilogue_completed && RTX_FRAME_RELATED_P (cur_insn))
9700 {
9701 if (GET_CODE (pat) == SET
9702 && GENERAL_REG_P (SET_DEST (pat)))
9703 {
9704 /* lgdr */
9705 if (GET_MODE (SET_SRC (pat)) == DImode
9706 && FP_REG_P (SET_SRC (pat)))
9707 continue;
9708
9709 /* l / lg */
9710 if (GET_CODE (SET_SRC (pat)) == MEM)
9711 continue;
9712 }
9713
9714 /* lm / lmg */
9715 if (GET_CODE (pat) == PARALLEL
9716 && load_multiple_operation (pat, VOIDmode))
9717 continue;
9718 }
9719
9720 note_stores (pat,
9721 s390_reg_clobbered_rtx,
9722 regs_ever_clobbered);
9723 }
9724 }
9725 }
9726
9727 /* Determine the frame area which actually has to be accessed
9728 in the function epilogue. The values are stored at the
9729 given pointers AREA_BOTTOM (address of the lowest used stack
9730 address) and AREA_TOP (address of the first item which does
9731 not belong to the stack frame). */
9732
9733 static void
9734 s390_frame_area (int *area_bottom, int *area_top)
9735 {
9736 int b, t;
9737
9738 b = INT_MAX;
9739 t = INT_MIN;
9740
9741 if (cfun_frame_layout.first_restore_gpr != -1)
9742 {
9743 b = (cfun_frame_layout.gprs_offset
9744 + cfun_frame_layout.first_restore_gpr * UNITS_PER_LONG);
9745 t = b + (cfun_frame_layout.last_restore_gpr
9746 - cfun_frame_layout.first_restore_gpr + 1) * UNITS_PER_LONG;
9747 }
9748
9749 if (TARGET_64BIT && cfun_save_high_fprs_p)
9750 {
9751 b = MIN (b, cfun_frame_layout.f8_offset);
9752 t = MAX (t, (cfun_frame_layout.f8_offset
9753 + cfun_frame_layout.high_fprs * 8));
9754 }
9755
9756 if (!TARGET_64BIT)
9757 {
9758 if (cfun_fpr_save_p (FPR4_REGNUM))
9759 {
9760 b = MIN (b, cfun_frame_layout.f4_offset);
9761 t = MAX (t, cfun_frame_layout.f4_offset + 8);
9762 }
9763 if (cfun_fpr_save_p (FPR6_REGNUM))
9764 {
9765 b = MIN (b, cfun_frame_layout.f4_offset + 8);
9766 t = MAX (t, cfun_frame_layout.f4_offset + 16);
9767 }
9768 }
9769 *area_bottom = b;
9770 *area_top = t;
9771 }
9772 /* Update gpr_save_slots in the frame layout trying to make use of
9773 FPRs as GPR save slots.
9774 This is a helper routine of s390_register_info. */
9775
9776 static void
9777 s390_register_info_gprtofpr ()
9778 {
9779 int save_reg_slot = FPR0_REGNUM;
9780 int i, j;
9781
9782 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
9783 return;
9784
9785 /* builtin_eh_return needs to be able to modify the return address
9786 on the stack. It could also adjust the FPR save slot instead but
9787 is it worth the trouble?! */
9788 if (crtl->calls_eh_return)
9789 return;
9790
9791 for (i = 15; i >= 6; i--)
9792 {
9793 if (cfun_gpr_save_slot (i) == SAVE_SLOT_NONE)
9794 continue;
9795
9796 /* Advance to the next FP register which can be used as a
9797 GPR save slot. */
9798 while ((!call_really_used_regs[save_reg_slot]
9799 || df_regs_ever_live_p (save_reg_slot)
9800 || cfun_fpr_save_p (save_reg_slot))
9801 && FP_REGNO_P (save_reg_slot))
9802 save_reg_slot++;
9803 if (!FP_REGNO_P (save_reg_slot))
9804 {
9805 /* We only want to use ldgr/lgdr if we can get rid of
9806 stm/lm entirely. So undo the gpr slot allocation in
9807 case we ran out of FPR save slots. */
9808 for (j = 6; j <= 15; j++)
9809 if (FP_REGNO_P (cfun_gpr_save_slot (j)))
9810 cfun_gpr_save_slot (j) = SAVE_SLOT_STACK;
9811 break;
9812 }
9813 cfun_gpr_save_slot (i) = save_reg_slot++;
9814 }
9815 }
9816
9817 /* Set the bits in fpr_bitmap for FPRs which need to be saved due to
9818 stdarg.
9819 This is a helper routine for s390_register_info. */
9820
9821 static void
9822 s390_register_info_stdarg_fpr ()
9823 {
9824 int i;
9825 int min_fpr;
9826 int max_fpr;
9827
9828 /* Save the FP argument regs for stdarg. f0, f2 for 31 bit and
9829 f0-f4 for 64 bit. */
9830 if (!cfun->stdarg
9831 || !TARGET_HARD_FLOAT
9832 || !cfun->va_list_fpr_size
9833 || crtl->args.info.fprs >= FP_ARG_NUM_REG)
9834 return;
9835
9836 min_fpr = crtl->args.info.fprs;
9837 max_fpr = min_fpr + cfun->va_list_fpr_size - 1;
9838 if (max_fpr >= FP_ARG_NUM_REG)
9839 max_fpr = FP_ARG_NUM_REG - 1;
9840
9841 /* FPR argument regs start at f0. */
9842 min_fpr += FPR0_REGNUM;
9843 max_fpr += FPR0_REGNUM;
9844
9845 for (i = min_fpr; i <= max_fpr; i++)
9846 cfun_set_fpr_save (i);
9847 }
9848
9849 /* Reserve the GPR save slots for GPRs which need to be saved due to
9850 stdarg.
9851 This is a helper routine for s390_register_info. */
9852
9853 static void
9854 s390_register_info_stdarg_gpr ()
9855 {
9856 int i;
9857 int min_gpr;
9858 int max_gpr;
9859
9860 if (!cfun->stdarg
9861 || !cfun->va_list_gpr_size
9862 || crtl->args.info.gprs >= GP_ARG_NUM_REG)
9863 return;
9864
9865 min_gpr = crtl->args.info.gprs;
9866 max_gpr = min_gpr + cfun->va_list_gpr_size - 1;
9867 if (max_gpr >= GP_ARG_NUM_REG)
9868 max_gpr = GP_ARG_NUM_REG - 1;
9869
9870 /* GPR argument regs start at r2. */
9871 min_gpr += GPR2_REGNUM;
9872 max_gpr += GPR2_REGNUM;
9873
9874 /* If r6 was supposed to be saved into an FPR and now needs to go to
9875 the stack for vararg we have to adjust the restore range to make
9876 sure that the restore is done from stack as well. */
9877 if (FP_REGNO_P (cfun_gpr_save_slot (GPR6_REGNUM))
9878 && min_gpr <= GPR6_REGNUM
9879 && max_gpr >= GPR6_REGNUM)
9880 {
9881 if (cfun_frame_layout.first_restore_gpr == -1
9882 || cfun_frame_layout.first_restore_gpr > GPR6_REGNUM)
9883 cfun_frame_layout.first_restore_gpr = GPR6_REGNUM;
9884 if (cfun_frame_layout.last_restore_gpr == -1
9885 || cfun_frame_layout.last_restore_gpr < GPR6_REGNUM)
9886 cfun_frame_layout.last_restore_gpr = GPR6_REGNUM;
9887 }
9888
9889 if (cfun_frame_layout.first_save_gpr == -1
9890 || cfun_frame_layout.first_save_gpr > min_gpr)
9891 cfun_frame_layout.first_save_gpr = min_gpr;
9892
9893 if (cfun_frame_layout.last_save_gpr == -1
9894 || cfun_frame_layout.last_save_gpr < max_gpr)
9895 cfun_frame_layout.last_save_gpr = max_gpr;
9896
9897 for (i = min_gpr; i <= max_gpr; i++)
9898 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
9899 }
9900
9901 /* Calculate the save and restore ranges for stm(g) and lm(g) in the
9902 prologue and epilogue. */
9903
9904 static void
9905 s390_register_info_set_ranges ()
9906 {
9907 int i, j;
9908
9909 /* Find the first and the last save slot supposed to use the stack
9910 to set the restore range.
9911 Vararg regs might be marked as save to stack but only the
9912 call-saved regs really need restoring (i.e. r6). This code
9913 assumes that the vararg regs have not yet been recorded in
9914 cfun_gpr_save_slot. */
9915 for (i = 0; i < 16 && cfun_gpr_save_slot (i) != SAVE_SLOT_STACK; i++);
9916 for (j = 15; j > i && cfun_gpr_save_slot (j) != SAVE_SLOT_STACK; j--);
9917 cfun_frame_layout.first_restore_gpr = (i == 16) ? -1 : i;
9918 cfun_frame_layout.last_restore_gpr = (i == 16) ? -1 : j;
9919 cfun_frame_layout.first_save_gpr = (i == 16) ? -1 : i;
9920 cfun_frame_layout.last_save_gpr = (i == 16) ? -1 : j;
9921 }
9922
9923 /* The GPR and FPR save slots in cfun->machine->frame_layout are set
9924 for registers which need to be saved in function prologue.
9925 This function can be used until the insns emitted for save/restore
9926 of the regs are visible in the RTL stream. */
9927
9928 static void
9929 s390_register_info ()
9930 {
9931 int i;
9932 char clobbered_regs[32];
9933
9934 gcc_assert (!epilogue_completed);
9935
9936 if (reload_completed)
9937 /* After reload we rely on our own routine to determine which
9938 registers need saving. */
9939 s390_regs_ever_clobbered (clobbered_regs);
9940 else
9941 /* During reload we use regs_ever_live as a base since reload
9942 does changes in there which we otherwise would not be aware
9943 of. */
9944 for (i = 0; i < 32; i++)
9945 clobbered_regs[i] = df_regs_ever_live_p (i);
9946
9947 for (i = 0; i < 32; i++)
9948 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
9949
9950 /* Mark the call-saved FPRs which need to be saved.
9951 This needs to be done before checking the special GPRs since the
9952 stack pointer usage depends on whether high FPRs have to be saved
9953 or not. */
9954 cfun_frame_layout.fpr_bitmap = 0;
9955 cfun_frame_layout.high_fprs = 0;
9956 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
9957 if (clobbered_regs[i] && !call_really_used_regs[i])
9958 {
9959 cfun_set_fpr_save (i);
9960 if (i >= FPR8_REGNUM)
9961 cfun_frame_layout.high_fprs++;
9962 }
9963
9964 /* Register 12 is used for GOT address, but also as temp in prologue
9965 for split-stack stdarg functions (unless r14 is available). */
9966 clobbered_regs[12]
9967 |= ((flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
9968 || (flag_split_stack && cfun->stdarg
9969 && (crtl->is_leaf || TARGET_TPF_PROFILING
9970 || has_hard_reg_initial_val (Pmode, RETURN_REGNUM))));
9971
9972 clobbered_regs[BASE_REGNUM]
9973 |= (cfun->machine->base_reg
9974 && REGNO (cfun->machine->base_reg) == BASE_REGNUM);
9975
9976 clobbered_regs[HARD_FRAME_POINTER_REGNUM]
9977 |= !!frame_pointer_needed;
9978
9979 /* On pre z900 machines this might take until machine dependent
9980 reorg to decide.
9981 save_return_addr_p will only be set on non-zarch machines so
9982 there is no risk that r14 goes into an FPR instead of a stack
9983 slot. */
9984 clobbered_regs[RETURN_REGNUM]
9985 |= (!crtl->is_leaf
9986 || TARGET_TPF_PROFILING
9987 || cfun->machine->split_branches_pending_p
9988 || cfun_frame_layout.save_return_addr_p
9989 || crtl->calls_eh_return);
9990
9991 clobbered_regs[STACK_POINTER_REGNUM]
9992 |= (!crtl->is_leaf
9993 || TARGET_TPF_PROFILING
9994 || cfun_save_high_fprs_p
9995 || get_frame_size () > 0
9996 || (reload_completed && cfun_frame_layout.frame_size > 0)
9997 || cfun->calls_alloca);
9998
9999 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 16);
10000
10001 for (i = 6; i < 16; i++)
10002 if (clobbered_regs[i])
10003 cfun_gpr_save_slot (i) = SAVE_SLOT_STACK;
10004
10005 s390_register_info_stdarg_fpr ();
10006 s390_register_info_gprtofpr ();
10007 s390_register_info_set_ranges ();
10008 /* stdarg functions might need to save GPRs 2 to 6. This might
10009 override the GPR->FPR save decision made by
10010 s390_register_info_gprtofpr for r6 since vararg regs must go to
10011 the stack. */
10012 s390_register_info_stdarg_gpr ();
10013 }
10014
10015 /* This function is called by s390_optimize_prologue in order to get
10016 rid of unnecessary GPR save/restore instructions. The register info
10017 for the GPRs is re-computed and the ranges are re-calculated. */
10018
10019 static void
10020 s390_optimize_register_info ()
10021 {
10022 char clobbered_regs[32];
10023 int i;
10024
10025 gcc_assert (epilogue_completed);
10026 gcc_assert (!cfun->machine->split_branches_pending_p);
10027
10028 s390_regs_ever_clobbered (clobbered_regs);
10029
10030 for (i = 0; i < 32; i++)
10031 clobbered_regs[i] = clobbered_regs[i] && !global_regs[i];
10032
10033 /* There is still special treatment needed for cases invisible to
10034 s390_regs_ever_clobbered. */
10035 clobbered_regs[RETURN_REGNUM]
10036 |= (TARGET_TPF_PROFILING
10037 /* When expanding builtin_return_addr in ESA mode we do not
10038 know whether r14 will later be needed as scratch reg when
10039 doing branch splitting. So the builtin always accesses the
10040 r14 save slot and we need to stick to the save/restore
10041 decision for r14 even if it turns out that it didn't get
10042 clobbered. */
10043 || cfun_frame_layout.save_return_addr_p
10044 || crtl->calls_eh_return);
10045
10046 memset (cfun_frame_layout.gpr_save_slots, SAVE_SLOT_NONE, 6);
10047
10048 for (i = 6; i < 16; i++)
10049 if (!clobbered_regs[i])
10050 cfun_gpr_save_slot (i) = SAVE_SLOT_NONE;
10051
10052 s390_register_info_set_ranges ();
10053 s390_register_info_stdarg_gpr ();
10054 }
10055
10056 /* Fill cfun->machine with info about frame of current function. */
10057
10058 static void
10059 s390_frame_info (void)
10060 {
10061 HOST_WIDE_INT lowest_offset;
10062
10063 cfun_frame_layout.first_save_gpr_slot = cfun_frame_layout.first_save_gpr;
10064 cfun_frame_layout.last_save_gpr_slot = cfun_frame_layout.last_save_gpr;
10065
10066 /* The va_arg builtin uses a constant distance of 16 *
10067 UNITS_PER_LONG (r0-r15) to reach the FPRs from the reg_save_area
10068 pointer. So even if we are going to save the stack pointer in an
10069 FPR we need the stack space in order to keep the offsets
10070 correct. */
10071 if (cfun->stdarg && cfun_save_arg_fprs_p)
10072 {
10073 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10074
10075 if (cfun_frame_layout.first_save_gpr_slot == -1)
10076 cfun_frame_layout.first_save_gpr_slot = STACK_POINTER_REGNUM;
10077 }
10078
10079 cfun_frame_layout.frame_size = get_frame_size ();
10080 if (!TARGET_64BIT && cfun_frame_layout.frame_size > 0x7fff0000)
10081 fatal_error (input_location,
10082 "total size of local variables exceeds architecture limit");
10083
10084 if (!TARGET_PACKED_STACK)
10085 {
10086 /* Fixed stack layout. */
10087 cfun_frame_layout.backchain_offset = 0;
10088 cfun_frame_layout.f0_offset = 16 * UNITS_PER_LONG;
10089 cfun_frame_layout.f4_offset = cfun_frame_layout.f0_offset + 2 * 8;
10090 cfun_frame_layout.f8_offset = -cfun_frame_layout.high_fprs * 8;
10091 cfun_frame_layout.gprs_offset = (cfun_frame_layout.first_save_gpr_slot
10092 * UNITS_PER_LONG);
10093 }
10094 else if (TARGET_BACKCHAIN)
10095 {
10096 /* Kernel stack layout - packed stack, backchain, no float */
10097 gcc_assert (TARGET_SOFT_FLOAT);
10098 cfun_frame_layout.backchain_offset = (STACK_POINTER_OFFSET
10099 - UNITS_PER_LONG);
10100
10101 /* The distance between the backchain and the return address
10102 save slot must not change. So we always need a slot for the
10103 stack pointer which resides in between. */
10104 cfun_frame_layout.last_save_gpr_slot = STACK_POINTER_REGNUM;
10105
10106 cfun_frame_layout.gprs_offset
10107 = cfun_frame_layout.backchain_offset - cfun_gprs_save_area_size;
10108
10109 /* FPRs will not be saved. Nevertheless pick sane values to
10110 keep area calculations valid. */
10111 cfun_frame_layout.f0_offset =
10112 cfun_frame_layout.f4_offset =
10113 cfun_frame_layout.f8_offset = cfun_frame_layout.gprs_offset;
10114 }
10115 else
10116 {
10117 int num_fprs;
10118
10119 /* Packed stack layout without backchain. */
10120
10121 /* With stdarg FPRs need their dedicated slots. */
10122 num_fprs = (TARGET_64BIT && cfun->stdarg ? 2
10123 : (cfun_fpr_save_p (FPR4_REGNUM) +
10124 cfun_fpr_save_p (FPR6_REGNUM)));
10125 cfun_frame_layout.f4_offset = STACK_POINTER_OFFSET - 8 * num_fprs;
10126
10127 num_fprs = (cfun->stdarg ? 2
10128 : (cfun_fpr_save_p (FPR0_REGNUM)
10129 + cfun_fpr_save_p (FPR2_REGNUM)));
10130 cfun_frame_layout.f0_offset = cfun_frame_layout.f4_offset - 8 * num_fprs;
10131
10132 cfun_frame_layout.gprs_offset
10133 = cfun_frame_layout.f0_offset - cfun_gprs_save_area_size;
10134
10135 cfun_frame_layout.f8_offset = (cfun_frame_layout.gprs_offset
10136 - cfun_frame_layout.high_fprs * 8);
10137 }
10138
10139 if (cfun_save_high_fprs_p)
10140 cfun_frame_layout.frame_size += cfun_frame_layout.high_fprs * 8;
10141
10142 if (!crtl->is_leaf)
10143 cfun_frame_layout.frame_size += crtl->outgoing_args_size;
10144
10145 /* In the following cases we have to allocate a STACK_POINTER_OFFSET
10146 sized area at the bottom of the stack. This is required also for
10147 leaf functions. When GCC generates a local stack reference it
10148 will always add STACK_POINTER_OFFSET to all these references. */
10149 if (crtl->is_leaf
10150 && !TARGET_TPF_PROFILING
10151 && cfun_frame_layout.frame_size == 0
10152 && !cfun->calls_alloca)
10153 return;
10154
10155 /* Calculate the number of bytes we have used in our own register
10156 save area. With the packed stack layout we can re-use the
10157 remaining bytes for normal stack elements. */
10158
10159 if (TARGET_PACKED_STACK)
10160 lowest_offset = MIN (MIN (cfun_frame_layout.f0_offset,
10161 cfun_frame_layout.f4_offset),
10162 cfun_frame_layout.gprs_offset);
10163 else
10164 lowest_offset = 0;
10165
10166 if (TARGET_BACKCHAIN)
10167 lowest_offset = MIN (lowest_offset, cfun_frame_layout.backchain_offset);
10168
10169 cfun_frame_layout.frame_size += STACK_POINTER_OFFSET - lowest_offset;
10170
10171 /* If under 31 bit an odd number of gprs has to be saved we have to
10172 adjust the frame size to sustain 8 byte alignment of stack
10173 frames. */
10174 cfun_frame_layout.frame_size = ((cfun_frame_layout.frame_size +
10175 STACK_BOUNDARY / BITS_PER_UNIT - 1)
10176 & ~(STACK_BOUNDARY / BITS_PER_UNIT - 1));
10177 }
10178
10179 /* Generate frame layout. Fills in register and frame data for the current
10180 function in cfun->machine. This routine can be called multiple times;
10181 it will re-do the complete frame layout every time. */
10182
10183 static void
10184 s390_init_frame_layout (void)
10185 {
10186 HOST_WIDE_INT frame_size;
10187 int base_used;
10188
10189 /* After LRA the frame layout is supposed to be read-only and should
10190 not be re-computed. */
10191 if (reload_completed)
10192 return;
10193
10194 /* On S/390 machines, we may need to perform branch splitting, which
10195 will require both base and return address register. We have no
10196 choice but to assume we're going to need them until right at the
10197 end of the machine dependent reorg phase. */
10198 if (!TARGET_CPU_ZARCH)
10199 cfun->machine->split_branches_pending_p = true;
10200
10201 do
10202 {
10203 frame_size = cfun_frame_layout.frame_size;
10204
10205 /* Try to predict whether we'll need the base register. */
10206 base_used = cfun->machine->split_branches_pending_p
10207 || crtl->uses_const_pool
10208 || (!DISP_IN_RANGE (frame_size)
10209 && !CONST_OK_FOR_K (frame_size));
10210
10211 /* Decide which register to use as literal pool base. In small
10212 leaf functions, try to use an unused call-clobbered register
10213 as base register to avoid save/restore overhead. */
10214 if (!base_used)
10215 cfun->machine->base_reg = NULL_RTX;
10216 else
10217 {
10218 int br = 0;
10219
10220 if (crtl->is_leaf)
10221 /* Prefer r5 (most likely to be free). */
10222 for (br = 5; br >= 2 && df_regs_ever_live_p (br); br--)
10223 ;
10224 cfun->machine->base_reg =
10225 gen_rtx_REG (Pmode, (br >= 2) ? br : BASE_REGNUM);
10226 }
10227
10228 s390_register_info ();
10229 s390_frame_info ();
10230 }
10231 while (frame_size != cfun_frame_layout.frame_size);
10232 }
10233
10234 /* Remove the FPR clobbers from a tbegin insn if it can be proven that
10235 the TX is nonescaping. A transaction is considered escaping if
10236 there is at least one path from tbegin returning CC0 to the
10237 function exit block without an tend.
10238
10239 The check so far has some limitations:
10240 - only single tbegin/tend BBs are supported
10241 - the first cond jump after tbegin must separate the CC0 path from ~CC0
10242 - when CC is copied to a GPR and the CC0 check is done with the GPR
10243 this is not supported
10244 */
10245
10246 static void
10247 s390_optimize_nonescaping_tx (void)
10248 {
10249 const unsigned int CC0 = 1 << 3;
10250 basic_block tbegin_bb = NULL;
10251 basic_block tend_bb = NULL;
10252 basic_block bb;
10253 rtx_insn *insn;
10254 bool result = true;
10255 int bb_index;
10256 rtx_insn *tbegin_insn = NULL;
10257
10258 if (!cfun->machine->tbegin_p)
10259 return;
10260
10261 for (bb_index = 0; bb_index < n_basic_blocks_for_fn (cfun); bb_index++)
10262 {
10263 bb = BASIC_BLOCK_FOR_FN (cfun, bb_index);
10264
10265 if (!bb)
10266 continue;
10267
10268 FOR_BB_INSNS (bb, insn)
10269 {
10270 rtx ite, cc, pat, target;
10271 unsigned HOST_WIDE_INT mask;
10272
10273 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
10274 continue;
10275
10276 pat = PATTERN (insn);
10277
10278 if (GET_CODE (pat) == PARALLEL)
10279 pat = XVECEXP (pat, 0, 0);
10280
10281 if (GET_CODE (pat) != SET
10282 || GET_CODE (SET_SRC (pat)) != UNSPEC_VOLATILE)
10283 continue;
10284
10285 if (XINT (SET_SRC (pat), 1) == UNSPECV_TBEGIN)
10286 {
10287 rtx_insn *tmp;
10288
10289 tbegin_insn = insn;
10290
10291 /* Just return if the tbegin doesn't have clobbers. */
10292 if (GET_CODE (PATTERN (insn)) != PARALLEL)
10293 return;
10294
10295 if (tbegin_bb != NULL)
10296 return;
10297
10298 /* Find the next conditional jump. */
10299 for (tmp = NEXT_INSN (insn);
10300 tmp != NULL_RTX;
10301 tmp = NEXT_INSN (tmp))
10302 {
10303 if (reg_set_p (gen_rtx_REG (CCmode, CC_REGNUM), tmp))
10304 return;
10305 if (!JUMP_P (tmp))
10306 continue;
10307
10308 ite = SET_SRC (PATTERN (tmp));
10309 if (GET_CODE (ite) != IF_THEN_ELSE)
10310 continue;
10311
10312 cc = XEXP (XEXP (ite, 0), 0);
10313 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc))
10314 || GET_MODE (cc) != CCRAWmode
10315 || GET_CODE (XEXP (XEXP (ite, 0), 1)) != CONST_INT)
10316 return;
10317
10318 if (bb->succs->length () != 2)
10319 return;
10320
10321 mask = INTVAL (XEXP (XEXP (ite, 0), 1));
10322 if (GET_CODE (XEXP (ite, 0)) == NE)
10323 mask ^= 0xf;
10324
10325 if (mask == CC0)
10326 target = XEXP (ite, 1);
10327 else if (mask == (CC0 ^ 0xf))
10328 target = XEXP (ite, 2);
10329 else
10330 return;
10331
10332 {
10333 edge_iterator ei;
10334 edge e1, e2;
10335
10336 ei = ei_start (bb->succs);
10337 e1 = ei_safe_edge (ei);
10338 ei_next (&ei);
10339 e2 = ei_safe_edge (ei);
10340
10341 if (e2->flags & EDGE_FALLTHRU)
10342 {
10343 e2 = e1;
10344 e1 = ei_safe_edge (ei);
10345 }
10346
10347 if (!(e1->flags & EDGE_FALLTHRU))
10348 return;
10349
10350 tbegin_bb = (target == pc_rtx) ? e1->dest : e2->dest;
10351 }
10352 if (tmp == BB_END (bb))
10353 break;
10354 }
10355 }
10356
10357 if (XINT (SET_SRC (pat), 1) == UNSPECV_TEND)
10358 {
10359 if (tend_bb != NULL)
10360 return;
10361 tend_bb = bb;
10362 }
10363 }
10364 }
10365
10366 /* Either we successfully remove the FPR clobbers here or we are not
10367 able to do anything for this TX. Both cases don't qualify for
10368 another look. */
10369 cfun->machine->tbegin_p = false;
10370
10371 if (tbegin_bb == NULL || tend_bb == NULL)
10372 return;
10373
10374 calculate_dominance_info (CDI_POST_DOMINATORS);
10375 result = dominated_by_p (CDI_POST_DOMINATORS, tbegin_bb, tend_bb);
10376 free_dominance_info (CDI_POST_DOMINATORS);
10377
10378 if (!result)
10379 return;
10380
10381 PATTERN (tbegin_insn) = gen_rtx_PARALLEL (VOIDmode,
10382 gen_rtvec (2,
10383 XVECEXP (PATTERN (tbegin_insn), 0, 0),
10384 XVECEXP (PATTERN (tbegin_insn), 0, 1)));
10385 INSN_CODE (tbegin_insn) = -1;
10386 df_insn_rescan (tbegin_insn);
10387
10388 return;
10389 }
10390
10391 /* Implement TARGET_HARD_REGNO_MODE_OK. */
10392
10393 static bool
10394 s390_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10395 {
10396 if (!TARGET_VX && VECTOR_NOFP_REGNO_P (regno))
10397 return false;
10398
10399 switch (REGNO_REG_CLASS (regno))
10400 {
10401 case VEC_REGS:
10402 return ((GET_MODE_CLASS (mode) == MODE_INT
10403 && s390_class_max_nregs (VEC_REGS, mode) == 1)
10404 || mode == DFmode
10405 || (TARGET_VXE && mode == SFmode)
10406 || s390_vector_mode_supported_p (mode));
10407 break;
10408 case FP_REGS:
10409 if (TARGET_VX
10410 && ((GET_MODE_CLASS (mode) == MODE_INT
10411 && s390_class_max_nregs (FP_REGS, mode) == 1)
10412 || mode == DFmode
10413 || s390_vector_mode_supported_p (mode)))
10414 return true;
10415
10416 if (REGNO_PAIR_OK (regno, mode))
10417 {
10418 if (mode == SImode || mode == DImode)
10419 return true;
10420
10421 if (FLOAT_MODE_P (mode) && GET_MODE_CLASS (mode) != MODE_VECTOR_FLOAT)
10422 return true;
10423 }
10424 break;
10425 case ADDR_REGS:
10426 if (FRAME_REGNO_P (regno) && mode == Pmode)
10427 return true;
10428
10429 /* fallthrough */
10430 case GENERAL_REGS:
10431 if (REGNO_PAIR_OK (regno, mode))
10432 {
10433 if (TARGET_ZARCH
10434 || (mode != TFmode && mode != TCmode && mode != TDmode))
10435 return true;
10436 }
10437 break;
10438 case CC_REGS:
10439 if (GET_MODE_CLASS (mode) == MODE_CC)
10440 return true;
10441 break;
10442 case ACCESS_REGS:
10443 if (REGNO_PAIR_OK (regno, mode))
10444 {
10445 if (mode == SImode || mode == Pmode)
10446 return true;
10447 }
10448 break;
10449 default:
10450 return false;
10451 }
10452
10453 return false;
10454 }
10455
10456 /* Implement TARGET_MODES_TIEABLE_P. */
10457
10458 static bool
10459 s390_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10460 {
10461 return ((mode1 == SFmode || mode1 == DFmode)
10462 == (mode2 == SFmode || mode2 == DFmode));
10463 }
10464
10465 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
10466
10467 bool
10468 s390_hard_regno_rename_ok (unsigned int old_reg, unsigned int new_reg)
10469 {
10470 /* Once we've decided upon a register to use as base register, it must
10471 no longer be used for any other purpose. */
10472 if (cfun->machine->base_reg)
10473 if (REGNO (cfun->machine->base_reg) == old_reg
10474 || REGNO (cfun->machine->base_reg) == new_reg)
10475 return false;
10476
10477 /* Prevent regrename from using call-saved regs which haven't
10478 actually been saved. This is necessary since regrename assumes
10479 the backend save/restore decisions are based on
10480 df_regs_ever_live. Since we have our own routine we have to tell
10481 regrename manually about it. */
10482 if (GENERAL_REGNO_P (new_reg)
10483 && !call_really_used_regs[new_reg]
10484 && cfun_gpr_save_slot (new_reg) == SAVE_SLOT_NONE)
10485 return false;
10486
10487 return true;
10488 }
10489
10490 /* Return nonzero if register REGNO can be used as a scratch register
10491 in peephole2. */
10492
10493 static bool
10494 s390_hard_regno_scratch_ok (unsigned int regno)
10495 {
10496 /* See s390_hard_regno_rename_ok. */
10497 if (GENERAL_REGNO_P (regno)
10498 && !call_really_used_regs[regno]
10499 && cfun_gpr_save_slot (regno) == SAVE_SLOT_NONE)
10500 return false;
10501
10502 return true;
10503 }
10504
10505 /* Implement TARGET_HARD_REGNO_CALL_PART_CLOBBERED. When generating
10506 code that runs in z/Architecture mode, but conforms to the 31-bit
10507 ABI, GPRs can hold 8 bytes; the ABI guarantees only that the lower 4
10508 bytes are saved across calls, however. */
10509
10510 static bool
10511 s390_hard_regno_call_part_clobbered (unsigned int regno, machine_mode mode)
10512 {
10513 if (!TARGET_64BIT
10514 && TARGET_ZARCH
10515 && GET_MODE_SIZE (mode) > 4
10516 && ((regno >= 6 && regno <= 15) || regno == 32))
10517 return true;
10518
10519 if (TARGET_VX
10520 && GET_MODE_SIZE (mode) > 8
10521 && (((TARGET_64BIT && regno >= 24 && regno <= 31))
10522 || (!TARGET_64BIT && (regno == 18 || regno == 19))))
10523 return true;
10524
10525 return false;
10526 }
10527
10528 /* Maximum number of registers to represent a value of mode MODE
10529 in a register of class RCLASS. */
10530
10531 int
10532 s390_class_max_nregs (enum reg_class rclass, machine_mode mode)
10533 {
10534 int reg_size;
10535 bool reg_pair_required_p = false;
10536
10537 switch (rclass)
10538 {
10539 case FP_REGS:
10540 case VEC_REGS:
10541 reg_size = TARGET_VX ? 16 : 8;
10542
10543 /* TF and TD modes would fit into a VR but we put them into a
10544 register pair since we do not have 128bit FP instructions on
10545 full VRs. */
10546 if (TARGET_VX
10547 && SCALAR_FLOAT_MODE_P (mode)
10548 && GET_MODE_SIZE (mode) >= 16)
10549 reg_pair_required_p = true;
10550
10551 /* Even if complex types would fit into a single FPR/VR we force
10552 them into a register pair to deal with the parts more easily.
10553 (FIXME: What about complex ints?) */
10554 if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
10555 reg_pair_required_p = true;
10556 break;
10557 case ACCESS_REGS:
10558 reg_size = 4;
10559 break;
10560 default:
10561 reg_size = UNITS_PER_WORD;
10562 break;
10563 }
10564
10565 if (reg_pair_required_p)
10566 return 2 * ((GET_MODE_SIZE (mode) / 2 + reg_size - 1) / reg_size);
10567
10568 return (GET_MODE_SIZE (mode) + reg_size - 1) / reg_size;
10569 }
10570
10571 /* Return TRUE if changing mode from FROM to TO should not be allowed
10572 for register class CLASS. */
10573
10574 int
10575 s390_cannot_change_mode_class (machine_mode from_mode,
10576 machine_mode to_mode,
10577 enum reg_class rclass)
10578 {
10579 machine_mode small_mode;
10580 machine_mode big_mode;
10581
10582 /* V1TF and TF have different representations in vector
10583 registers. */
10584 if (reg_classes_intersect_p (VEC_REGS, rclass)
10585 && ((from_mode == V1TFmode && to_mode == TFmode)
10586 || (from_mode == TFmode && to_mode == V1TFmode)))
10587 return 1;
10588
10589 if (GET_MODE_SIZE (from_mode) == GET_MODE_SIZE (to_mode))
10590 return 0;
10591
10592 if (GET_MODE_SIZE (from_mode) < GET_MODE_SIZE (to_mode))
10593 {
10594 small_mode = from_mode;
10595 big_mode = to_mode;
10596 }
10597 else
10598 {
10599 small_mode = to_mode;
10600 big_mode = from_mode;
10601 }
10602
10603 /* Values residing in VRs are little-endian style. All modes are
10604 placed left-aligned in an VR. This means that we cannot allow
10605 switching between modes with differing sizes. Also if the vector
10606 facility is available we still place TFmode values in VR register
10607 pairs, since the only instructions we have operating on TFmodes
10608 only deal with register pairs. Therefore we have to allow DFmode
10609 subregs of TFmodes to enable the TFmode splitters. */
10610 if (reg_classes_intersect_p (VEC_REGS, rclass)
10611 && (GET_MODE_SIZE (small_mode) < 8
10612 || s390_class_max_nregs (VEC_REGS, big_mode) == 1))
10613 return 1;
10614
10615 /* Likewise for access registers, since they have only half the
10616 word size on 64-bit. */
10617 if (reg_classes_intersect_p (ACCESS_REGS, rclass))
10618 return 1;
10619
10620 return 0;
10621 }
10622
10623 /* Return true if we use LRA instead of reload pass. */
10624 static bool
10625 s390_lra_p (void)
10626 {
10627 return s390_lra_flag;
10628 }
10629
10630 /* Return true if register FROM can be eliminated via register TO. */
10631
10632 static bool
10633 s390_can_eliminate (const int from, const int to)
10634 {
10635 /* On zSeries machines, we have not marked the base register as fixed.
10636 Instead, we have an elimination rule BASE_REGNUM -> BASE_REGNUM.
10637 If a function requires the base register, we say here that this
10638 elimination cannot be performed. This will cause reload to free
10639 up the base register (as if it were fixed). On the other hand,
10640 if the current function does *not* require the base register, we
10641 say here the elimination succeeds, which in turn allows reload
10642 to allocate the base register for any other purpose. */
10643 if (from == BASE_REGNUM && to == BASE_REGNUM)
10644 {
10645 if (TARGET_CPU_ZARCH)
10646 {
10647 s390_init_frame_layout ();
10648 return cfun->machine->base_reg == NULL_RTX;
10649 }
10650
10651 return false;
10652 }
10653
10654 /* Everything else must point into the stack frame. */
10655 gcc_assert (to == STACK_POINTER_REGNUM
10656 || to == HARD_FRAME_POINTER_REGNUM);
10657
10658 gcc_assert (from == FRAME_POINTER_REGNUM
10659 || from == ARG_POINTER_REGNUM
10660 || from == RETURN_ADDRESS_POINTER_REGNUM);
10661
10662 /* Make sure we actually saved the return address. */
10663 if (from == RETURN_ADDRESS_POINTER_REGNUM)
10664 if (!crtl->calls_eh_return
10665 && !cfun->stdarg
10666 && !cfun_frame_layout.save_return_addr_p)
10667 return false;
10668
10669 return true;
10670 }
10671
10672 /* Return offset between register FROM and TO initially after prolog. */
10673
10674 HOST_WIDE_INT
10675 s390_initial_elimination_offset (int from, int to)
10676 {
10677 HOST_WIDE_INT offset;
10678
10679 /* ??? Why are we called for non-eliminable pairs? */
10680 if (!s390_can_eliminate (from, to))
10681 return 0;
10682
10683 switch (from)
10684 {
10685 case FRAME_POINTER_REGNUM:
10686 offset = (get_frame_size()
10687 + STACK_POINTER_OFFSET
10688 + crtl->outgoing_args_size);
10689 break;
10690
10691 case ARG_POINTER_REGNUM:
10692 s390_init_frame_layout ();
10693 offset = cfun_frame_layout.frame_size + STACK_POINTER_OFFSET;
10694 break;
10695
10696 case RETURN_ADDRESS_POINTER_REGNUM:
10697 s390_init_frame_layout ();
10698
10699 if (cfun_frame_layout.first_save_gpr_slot == -1)
10700 {
10701 /* If it turns out that for stdarg nothing went into the reg
10702 save area we also do not need the return address
10703 pointer. */
10704 if (cfun->stdarg && !cfun_save_arg_fprs_p)
10705 return 0;
10706
10707 gcc_unreachable ();
10708 }
10709
10710 /* In order to make the following work it is not necessary for
10711 r14 to have a save slot. It is sufficient if one other GPR
10712 got one. Since the GPRs are always stored without gaps we
10713 are able to calculate where the r14 save slot would
10714 reside. */
10715 offset = (cfun_frame_layout.frame_size + cfun_frame_layout.gprs_offset +
10716 (RETURN_REGNUM - cfun_frame_layout.first_save_gpr_slot) *
10717 UNITS_PER_LONG);
10718 break;
10719
10720 case BASE_REGNUM:
10721 offset = 0;
10722 break;
10723
10724 default:
10725 gcc_unreachable ();
10726 }
10727
10728 return offset;
10729 }
10730
10731 /* Emit insn to save fpr REGNUM at offset OFFSET relative
10732 to register BASE. Return generated insn. */
10733
10734 static rtx
10735 save_fpr (rtx base, int offset, int regnum)
10736 {
10737 rtx addr;
10738 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10739
10740 if (regnum >= 16 && regnum <= (16 + FP_ARG_NUM_REG))
10741 set_mem_alias_set (addr, get_varargs_alias_set ());
10742 else
10743 set_mem_alias_set (addr, get_frame_alias_set ());
10744
10745 return emit_move_insn (addr, gen_rtx_REG (DFmode, regnum));
10746 }
10747
10748 /* Emit insn to restore fpr REGNUM from offset OFFSET relative
10749 to register BASE. Return generated insn. */
10750
10751 static rtx
10752 restore_fpr (rtx base, int offset, int regnum)
10753 {
10754 rtx addr;
10755 addr = gen_rtx_MEM (DFmode, plus_constant (Pmode, base, offset));
10756 set_mem_alias_set (addr, get_frame_alias_set ());
10757
10758 return emit_move_insn (gen_rtx_REG (DFmode, regnum), addr);
10759 }
10760
10761 /* Return true if REGNO is a global register, but not one
10762 of the special ones that need to be saved/restored in anyway. */
10763
10764 static inline bool
10765 global_not_special_regno_p (int regno)
10766 {
10767 return (global_regs[regno]
10768 /* These registers are special and need to be
10769 restored in any case. */
10770 && !(regno == STACK_POINTER_REGNUM
10771 || regno == RETURN_REGNUM
10772 || regno == BASE_REGNUM
10773 || (flag_pic && regno == (int)PIC_OFFSET_TABLE_REGNUM)));
10774 }
10775
10776 /* Generate insn to save registers FIRST to LAST into
10777 the register save area located at offset OFFSET
10778 relative to register BASE. */
10779
10780 static rtx
10781 save_gprs (rtx base, int offset, int first, int last)
10782 {
10783 rtx addr, insn, note;
10784 int i;
10785
10786 addr = plus_constant (Pmode, base, offset);
10787 addr = gen_rtx_MEM (Pmode, addr);
10788
10789 set_mem_alias_set (addr, get_frame_alias_set ());
10790
10791 /* Special-case single register. */
10792 if (first == last)
10793 {
10794 if (TARGET_64BIT)
10795 insn = gen_movdi (addr, gen_rtx_REG (Pmode, first));
10796 else
10797 insn = gen_movsi (addr, gen_rtx_REG (Pmode, first));
10798
10799 if (!global_not_special_regno_p (first))
10800 RTX_FRAME_RELATED_P (insn) = 1;
10801 return insn;
10802 }
10803
10804
10805 insn = gen_store_multiple (addr,
10806 gen_rtx_REG (Pmode, first),
10807 GEN_INT (last - first + 1));
10808
10809 if (first <= 6 && cfun->stdarg)
10810 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
10811 {
10812 rtx mem = XEXP (XVECEXP (PATTERN (insn), 0, i), 0);
10813
10814 if (first + i <= 6)
10815 set_mem_alias_set (mem, get_varargs_alias_set ());
10816 }
10817
10818 /* We need to set the FRAME_RELATED flag on all SETs
10819 inside the store-multiple pattern.
10820
10821 However, we must not emit DWARF records for registers 2..5
10822 if they are stored for use by variable arguments ...
10823
10824 ??? Unfortunately, it is not enough to simply not the
10825 FRAME_RELATED flags for those SETs, because the first SET
10826 of the PARALLEL is always treated as if it had the flag
10827 set, even if it does not. Therefore we emit a new pattern
10828 without those registers as REG_FRAME_RELATED_EXPR note. */
10829
10830 if (first >= 6 && !global_not_special_regno_p (first))
10831 {
10832 rtx pat = PATTERN (insn);
10833
10834 for (i = 0; i < XVECLEN (pat, 0); i++)
10835 if (GET_CODE (XVECEXP (pat, 0, i)) == SET
10836 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (pat,
10837 0, i)))))
10838 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, i)) = 1;
10839
10840 RTX_FRAME_RELATED_P (insn) = 1;
10841 }
10842 else if (last >= 6)
10843 {
10844 int start;
10845
10846 for (start = first >= 6 ? first : 6; start <= last; start++)
10847 if (!global_not_special_regno_p (start))
10848 break;
10849
10850 if (start > last)
10851 return insn;
10852
10853 addr = plus_constant (Pmode, base,
10854 offset + (start - first) * UNITS_PER_LONG);
10855
10856 if (start == last)
10857 {
10858 if (TARGET_64BIT)
10859 note = gen_movdi (gen_rtx_MEM (Pmode, addr),
10860 gen_rtx_REG (Pmode, start));
10861 else
10862 note = gen_movsi (gen_rtx_MEM (Pmode, addr),
10863 gen_rtx_REG (Pmode, start));
10864 note = PATTERN (note);
10865
10866 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10867 RTX_FRAME_RELATED_P (insn) = 1;
10868
10869 return insn;
10870 }
10871
10872 note = gen_store_multiple (gen_rtx_MEM (Pmode, addr),
10873 gen_rtx_REG (Pmode, start),
10874 GEN_INT (last - start + 1));
10875 note = PATTERN (note);
10876
10877 add_reg_note (insn, REG_FRAME_RELATED_EXPR, note);
10878
10879 for (i = 0; i < XVECLEN (note, 0); i++)
10880 if (GET_CODE (XVECEXP (note, 0, i)) == SET
10881 && !global_not_special_regno_p (REGNO (SET_SRC (XVECEXP (note,
10882 0, i)))))
10883 RTX_FRAME_RELATED_P (XVECEXP (note, 0, i)) = 1;
10884
10885 RTX_FRAME_RELATED_P (insn) = 1;
10886 }
10887
10888 return insn;
10889 }
10890
10891 /* Generate insn to restore registers FIRST to LAST from
10892 the register save area located at offset OFFSET
10893 relative to register BASE. */
10894
10895 static rtx
10896 restore_gprs (rtx base, int offset, int first, int last)
10897 {
10898 rtx addr, insn;
10899
10900 addr = plus_constant (Pmode, base, offset);
10901 addr = gen_rtx_MEM (Pmode, addr);
10902 set_mem_alias_set (addr, get_frame_alias_set ());
10903
10904 /* Special-case single register. */
10905 if (first == last)
10906 {
10907 if (TARGET_64BIT)
10908 insn = gen_movdi (gen_rtx_REG (Pmode, first), addr);
10909 else
10910 insn = gen_movsi (gen_rtx_REG (Pmode, first), addr);
10911
10912 RTX_FRAME_RELATED_P (insn) = 1;
10913 return insn;
10914 }
10915
10916 insn = gen_load_multiple (gen_rtx_REG (Pmode, first),
10917 addr,
10918 GEN_INT (last - first + 1));
10919 RTX_FRAME_RELATED_P (insn) = 1;
10920 return insn;
10921 }
10922
10923 /* Return insn sequence to load the GOT register. */
10924
10925 rtx_insn *
10926 s390_load_got (void)
10927 {
10928 rtx_insn *insns;
10929
10930 /* We cannot use pic_offset_table_rtx here since we use this
10931 function also for non-pic if __tls_get_offset is called and in
10932 that case PIC_OFFSET_TABLE_REGNUM as well as pic_offset_table_rtx
10933 aren't usable. */
10934 rtx got_rtx = gen_rtx_REG (Pmode, 12);
10935
10936 start_sequence ();
10937
10938 if (TARGET_CPU_ZARCH)
10939 {
10940 emit_move_insn (got_rtx, s390_got_symbol ());
10941 }
10942 else
10943 {
10944 rtx offset;
10945
10946 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, s390_got_symbol ()),
10947 UNSPEC_LTREL_OFFSET);
10948 offset = gen_rtx_CONST (Pmode, offset);
10949 offset = force_const_mem (Pmode, offset);
10950
10951 emit_move_insn (got_rtx, offset);
10952
10953 offset = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (offset, 0)),
10954 UNSPEC_LTREL_BASE);
10955 offset = gen_rtx_PLUS (Pmode, got_rtx, offset);
10956
10957 emit_move_insn (got_rtx, offset);
10958 }
10959
10960 insns = get_insns ();
10961 end_sequence ();
10962 return insns;
10963 }
10964
10965 /* This ties together stack memory (MEM with an alias set of frame_alias_set)
10966 and the change to the stack pointer. */
10967
10968 static void
10969 s390_emit_stack_tie (void)
10970 {
10971 rtx mem = gen_frame_mem (BLKmode,
10972 gen_rtx_REG (Pmode, STACK_POINTER_REGNUM));
10973
10974 emit_insn (gen_stack_tie (mem));
10975 }
10976
10977 /* Copy GPRS into FPR save slots. */
10978
10979 static void
10980 s390_save_gprs_to_fprs (void)
10981 {
10982 int i;
10983
10984 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
10985 return;
10986
10987 for (i = 6; i < 16; i++)
10988 {
10989 if (FP_REGNO_P (cfun_gpr_save_slot (i)))
10990 {
10991 rtx_insn *insn =
10992 emit_move_insn (gen_rtx_REG (DImode, cfun_gpr_save_slot (i)),
10993 gen_rtx_REG (DImode, i));
10994 RTX_FRAME_RELATED_P (insn) = 1;
10995 /* This prevents dwarf2cfi from interpreting the set. Doing
10996 so it might emit def_cfa_register infos setting an FPR as
10997 new CFA. */
10998 add_reg_note (insn, REG_CFA_REGISTER, copy_rtx (PATTERN (insn)));
10999 }
11000 }
11001 }
11002
11003 /* Restore GPRs from FPR save slots. */
11004
11005 static void
11006 s390_restore_gprs_from_fprs (void)
11007 {
11008 int i;
11009
11010 if (!TARGET_Z10 || !TARGET_HARD_FLOAT || !crtl->is_leaf)
11011 return;
11012
11013 for (i = 6; i < 16; i++)
11014 {
11015 rtx_insn *insn;
11016
11017 if (!FP_REGNO_P (cfun_gpr_save_slot (i)))
11018 continue;
11019
11020 rtx fpr = gen_rtx_REG (DImode, cfun_gpr_save_slot (i));
11021
11022 if (i == STACK_POINTER_REGNUM)
11023 insn = emit_insn (gen_stack_restore_from_fpr (fpr));
11024 else
11025 insn = emit_move_insn (gen_rtx_REG (DImode, i), fpr);
11026
11027 df_set_regs_ever_live (i, true);
11028 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (DImode, i));
11029 if (i == STACK_POINTER_REGNUM)
11030 add_reg_note (insn, REG_CFA_DEF_CFA,
11031 plus_constant (Pmode, stack_pointer_rtx,
11032 STACK_POINTER_OFFSET));
11033 RTX_FRAME_RELATED_P (insn) = 1;
11034 }
11035 }
11036
11037
11038 /* A pass run immediately before shrink-wrapping and prologue and epilogue
11039 generation. */
11040
11041 namespace {
11042
11043 const pass_data pass_data_s390_early_mach =
11044 {
11045 RTL_PASS, /* type */
11046 "early_mach", /* name */
11047 OPTGROUP_NONE, /* optinfo_flags */
11048 TV_MACH_DEP, /* tv_id */
11049 0, /* properties_required */
11050 0, /* properties_provided */
11051 0, /* properties_destroyed */
11052 0, /* todo_flags_start */
11053 ( TODO_df_verify | TODO_df_finish ), /* todo_flags_finish */
11054 };
11055
11056 class pass_s390_early_mach : public rtl_opt_pass
11057 {
11058 public:
11059 pass_s390_early_mach (gcc::context *ctxt)
11060 : rtl_opt_pass (pass_data_s390_early_mach, ctxt)
11061 {}
11062
11063 /* opt_pass methods: */
11064 virtual unsigned int execute (function *);
11065
11066 }; // class pass_s390_early_mach
11067
11068 unsigned int
11069 pass_s390_early_mach::execute (function *fun)
11070 {
11071 rtx_insn *insn;
11072
11073 /* Try to get rid of the FPR clobbers. */
11074 s390_optimize_nonescaping_tx ();
11075
11076 /* Re-compute register info. */
11077 s390_register_info ();
11078
11079 /* If we're using a base register, ensure that it is always valid for
11080 the first non-prologue instruction. */
11081 if (fun->machine->base_reg)
11082 emit_insn_at_entry (gen_main_pool (fun->machine->base_reg));
11083
11084 /* Annotate all constant pool references to let the scheduler know
11085 they implicitly use the base register. */
11086 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
11087 if (INSN_P (insn))
11088 {
11089 annotate_constant_pool_refs (&PATTERN (insn));
11090 df_insn_rescan (insn);
11091 }
11092 return 0;
11093 }
11094
11095 } // anon namespace
11096
11097 /* Expand the prologue into a bunch of separate insns. */
11098
11099 void
11100 s390_emit_prologue (void)
11101 {
11102 rtx insn, addr;
11103 rtx temp_reg;
11104 int i;
11105 int offset;
11106 int next_fpr = 0;
11107
11108 /* Choose best register to use for temp use within prologue.
11109 TPF with profiling must avoid the register 14 - the tracing function
11110 needs the original contents of r14 to be preserved. */
11111
11112 if (!has_hard_reg_initial_val (Pmode, RETURN_REGNUM)
11113 && !crtl->is_leaf
11114 && !TARGET_TPF_PROFILING)
11115 temp_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11116 else if (flag_split_stack && cfun->stdarg)
11117 temp_reg = gen_rtx_REG (Pmode, 12);
11118 else
11119 temp_reg = gen_rtx_REG (Pmode, 1);
11120
11121 s390_save_gprs_to_fprs ();
11122
11123 /* Save call saved gprs. */
11124 if (cfun_frame_layout.first_save_gpr != -1)
11125 {
11126 insn = save_gprs (stack_pointer_rtx,
11127 cfun_frame_layout.gprs_offset +
11128 UNITS_PER_LONG * (cfun_frame_layout.first_save_gpr
11129 - cfun_frame_layout.first_save_gpr_slot),
11130 cfun_frame_layout.first_save_gpr,
11131 cfun_frame_layout.last_save_gpr);
11132 emit_insn (insn);
11133 }
11134
11135 /* Dummy insn to mark literal pool slot. */
11136
11137 if (cfun->machine->base_reg)
11138 emit_insn (gen_main_pool (cfun->machine->base_reg));
11139
11140 offset = cfun_frame_layout.f0_offset;
11141
11142 /* Save f0 and f2. */
11143 for (i = FPR0_REGNUM; i <= FPR0_REGNUM + 1; i++)
11144 {
11145 if (cfun_fpr_save_p (i))
11146 {
11147 save_fpr (stack_pointer_rtx, offset, i);
11148 offset += 8;
11149 }
11150 else if (!TARGET_PACKED_STACK || cfun->stdarg)
11151 offset += 8;
11152 }
11153
11154 /* Save f4 and f6. */
11155 offset = cfun_frame_layout.f4_offset;
11156 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11157 {
11158 if (cfun_fpr_save_p (i))
11159 {
11160 insn = save_fpr (stack_pointer_rtx, offset, i);
11161 offset += 8;
11162
11163 /* If f4 and f6 are call clobbered they are saved due to
11164 stdargs and therefore are not frame related. */
11165 if (!call_really_used_regs[i])
11166 RTX_FRAME_RELATED_P (insn) = 1;
11167 }
11168 else if (!TARGET_PACKED_STACK || call_really_used_regs[i])
11169 offset += 8;
11170 }
11171
11172 if (TARGET_PACKED_STACK
11173 && cfun_save_high_fprs_p
11174 && cfun_frame_layout.f8_offset + cfun_frame_layout.high_fprs * 8 > 0)
11175 {
11176 offset = (cfun_frame_layout.f8_offset
11177 + (cfun_frame_layout.high_fprs - 1) * 8);
11178
11179 for (i = FPR15_REGNUM; i >= FPR8_REGNUM && offset >= 0; i--)
11180 if (cfun_fpr_save_p (i))
11181 {
11182 insn = save_fpr (stack_pointer_rtx, offset, i);
11183
11184 RTX_FRAME_RELATED_P (insn) = 1;
11185 offset -= 8;
11186 }
11187 if (offset >= cfun_frame_layout.f8_offset)
11188 next_fpr = i;
11189 }
11190
11191 if (!TARGET_PACKED_STACK)
11192 next_fpr = cfun_save_high_fprs_p ? FPR15_REGNUM : 0;
11193
11194 if (flag_stack_usage_info)
11195 current_function_static_stack_size = cfun_frame_layout.frame_size;
11196
11197 /* Decrement stack pointer. */
11198
11199 if (cfun_frame_layout.frame_size > 0)
11200 {
11201 rtx frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11202 rtx real_frame_off;
11203
11204 if (s390_stack_size)
11205 {
11206 HOST_WIDE_INT stack_guard;
11207
11208 if (s390_stack_guard)
11209 stack_guard = s390_stack_guard;
11210 else
11211 {
11212 /* If no value for stack guard is provided the smallest power of 2
11213 larger than the current frame size is chosen. */
11214 stack_guard = 1;
11215 while (stack_guard < cfun_frame_layout.frame_size)
11216 stack_guard <<= 1;
11217 }
11218
11219 if (cfun_frame_layout.frame_size >= s390_stack_size)
11220 {
11221 warning (0, "frame size of function %qs is %wd"
11222 " bytes exceeding user provided stack limit of "
11223 "%d bytes. "
11224 "An unconditional trap is added.",
11225 current_function_name(), cfun_frame_layout.frame_size,
11226 s390_stack_size);
11227 emit_insn (gen_trap ());
11228 emit_barrier ();
11229 }
11230 else
11231 {
11232 /* stack_guard has to be smaller than s390_stack_size.
11233 Otherwise we would emit an AND with zero which would
11234 not match the test under mask pattern. */
11235 if (stack_guard >= s390_stack_size)
11236 {
11237 warning (0, "frame size of function %qs is %wd"
11238 " bytes which is more than half the stack size. "
11239 "The dynamic check would not be reliable. "
11240 "No check emitted for this function.",
11241 current_function_name(),
11242 cfun_frame_layout.frame_size);
11243 }
11244 else
11245 {
11246 HOST_WIDE_INT stack_check_mask = ((s390_stack_size - 1)
11247 & ~(stack_guard - 1));
11248
11249 rtx t = gen_rtx_AND (Pmode, stack_pointer_rtx,
11250 GEN_INT (stack_check_mask));
11251 if (TARGET_64BIT)
11252 emit_insn (gen_ctrapdi4 (gen_rtx_EQ (VOIDmode,
11253 t, const0_rtx),
11254 t, const0_rtx, const0_rtx));
11255 else
11256 emit_insn (gen_ctrapsi4 (gen_rtx_EQ (VOIDmode,
11257 t, const0_rtx),
11258 t, const0_rtx, const0_rtx));
11259 }
11260 }
11261 }
11262
11263 if (s390_warn_framesize > 0
11264 && cfun_frame_layout.frame_size >= s390_warn_framesize)
11265 warning (0, "frame size of %qs is %wd bytes",
11266 current_function_name (), cfun_frame_layout.frame_size);
11267
11268 if (s390_warn_dynamicstack_p && cfun->calls_alloca)
11269 warning (0, "%qs uses dynamic stack allocation", current_function_name ());
11270
11271 /* Save incoming stack pointer into temp reg. */
11272 if (TARGET_BACKCHAIN || next_fpr)
11273 insn = emit_insn (gen_move_insn (temp_reg, stack_pointer_rtx));
11274
11275 /* Subtract frame size from stack pointer. */
11276
11277 if (DISP_IN_RANGE (INTVAL (frame_off)))
11278 {
11279 insn = gen_rtx_SET (stack_pointer_rtx,
11280 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11281 frame_off));
11282 insn = emit_insn (insn);
11283 }
11284 else
11285 {
11286 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11287 frame_off = force_const_mem (Pmode, frame_off);
11288
11289 insn = emit_insn (gen_add2_insn (stack_pointer_rtx, frame_off));
11290 annotate_constant_pool_refs (&PATTERN (insn));
11291 }
11292
11293 RTX_FRAME_RELATED_P (insn) = 1;
11294 real_frame_off = GEN_INT (-cfun_frame_layout.frame_size);
11295 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11296 gen_rtx_SET (stack_pointer_rtx,
11297 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11298 real_frame_off)));
11299
11300 /* Set backchain. */
11301
11302 if (TARGET_BACKCHAIN)
11303 {
11304 if (cfun_frame_layout.backchain_offset)
11305 addr = gen_rtx_MEM (Pmode,
11306 plus_constant (Pmode, stack_pointer_rtx,
11307 cfun_frame_layout.backchain_offset));
11308 else
11309 addr = gen_rtx_MEM (Pmode, stack_pointer_rtx);
11310 set_mem_alias_set (addr, get_frame_alias_set ());
11311 insn = emit_insn (gen_move_insn (addr, temp_reg));
11312 }
11313
11314 /* If we support non-call exceptions (e.g. for Java),
11315 we need to make sure the backchain pointer is set up
11316 before any possibly trapping memory access. */
11317 if (TARGET_BACKCHAIN && cfun->can_throw_non_call_exceptions)
11318 {
11319 addr = gen_rtx_MEM (BLKmode, gen_rtx_SCRATCH (VOIDmode));
11320 emit_clobber (addr);
11321 }
11322 }
11323
11324 /* Save fprs 8 - 15 (64 bit ABI). */
11325
11326 if (cfun_save_high_fprs_p && next_fpr)
11327 {
11328 /* If the stack might be accessed through a different register
11329 we have to make sure that the stack pointer decrement is not
11330 moved below the use of the stack slots. */
11331 s390_emit_stack_tie ();
11332
11333 insn = emit_insn (gen_add2_insn (temp_reg,
11334 GEN_INT (cfun_frame_layout.f8_offset)));
11335
11336 offset = 0;
11337
11338 for (i = FPR8_REGNUM; i <= next_fpr; i++)
11339 if (cfun_fpr_save_p (i))
11340 {
11341 rtx addr = plus_constant (Pmode, stack_pointer_rtx,
11342 cfun_frame_layout.frame_size
11343 + cfun_frame_layout.f8_offset
11344 + offset);
11345
11346 insn = save_fpr (temp_reg, offset, i);
11347 offset += 8;
11348 RTX_FRAME_RELATED_P (insn) = 1;
11349 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
11350 gen_rtx_SET (gen_rtx_MEM (DFmode, addr),
11351 gen_rtx_REG (DFmode, i)));
11352 }
11353 }
11354
11355 /* Set frame pointer, if needed. */
11356
11357 if (frame_pointer_needed)
11358 {
11359 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
11360 RTX_FRAME_RELATED_P (insn) = 1;
11361 }
11362
11363 /* Set up got pointer, if needed. */
11364
11365 if (flag_pic && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
11366 {
11367 rtx_insn *insns = s390_load_got ();
11368
11369 for (rtx_insn *insn = insns; insn; insn = NEXT_INSN (insn))
11370 annotate_constant_pool_refs (&PATTERN (insn));
11371
11372 emit_insn (insns);
11373 }
11374
11375 if (TARGET_TPF_PROFILING)
11376 {
11377 /* Generate a BAS instruction to serve as a function
11378 entry intercept to facilitate the use of tracing
11379 algorithms located at the branch target. */
11380 emit_insn (gen_prologue_tpf ());
11381
11382 /* Emit a blockage here so that all code
11383 lies between the profiling mechanisms. */
11384 emit_insn (gen_blockage ());
11385 }
11386 }
11387
11388 /* Expand the epilogue into a bunch of separate insns. */
11389
11390 void
11391 s390_emit_epilogue (bool sibcall)
11392 {
11393 rtx frame_pointer, return_reg, cfa_restores = NULL_RTX;
11394 int area_bottom, area_top, offset = 0;
11395 int next_offset;
11396 rtvec p;
11397 int i;
11398
11399 if (TARGET_TPF_PROFILING)
11400 {
11401
11402 /* Generate a BAS instruction to serve as a function
11403 entry intercept to facilitate the use of tracing
11404 algorithms located at the branch target. */
11405
11406 /* Emit a blockage here so that all code
11407 lies between the profiling mechanisms. */
11408 emit_insn (gen_blockage ());
11409
11410 emit_insn (gen_epilogue_tpf ());
11411 }
11412
11413 /* Check whether to use frame or stack pointer for restore. */
11414
11415 frame_pointer = (frame_pointer_needed
11416 ? hard_frame_pointer_rtx : stack_pointer_rtx);
11417
11418 s390_frame_area (&area_bottom, &area_top);
11419
11420 /* Check whether we can access the register save area.
11421 If not, increment the frame pointer as required. */
11422
11423 if (area_top <= area_bottom)
11424 {
11425 /* Nothing to restore. */
11426 }
11427 else if (DISP_IN_RANGE (cfun_frame_layout.frame_size + area_bottom)
11428 && DISP_IN_RANGE (cfun_frame_layout.frame_size + area_top - 1))
11429 {
11430 /* Area is in range. */
11431 offset = cfun_frame_layout.frame_size;
11432 }
11433 else
11434 {
11435 rtx insn, frame_off, cfa;
11436
11437 offset = area_bottom < 0 ? -area_bottom : 0;
11438 frame_off = GEN_INT (cfun_frame_layout.frame_size - offset);
11439
11440 cfa = gen_rtx_SET (frame_pointer,
11441 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11442 if (DISP_IN_RANGE (INTVAL (frame_off)))
11443 {
11444 insn = gen_rtx_SET (frame_pointer,
11445 gen_rtx_PLUS (Pmode, frame_pointer, frame_off));
11446 insn = emit_insn (insn);
11447 }
11448 else
11449 {
11450 if (!CONST_OK_FOR_K (INTVAL (frame_off)))
11451 frame_off = force_const_mem (Pmode, frame_off);
11452
11453 insn = emit_insn (gen_add2_insn (frame_pointer, frame_off));
11454 annotate_constant_pool_refs (&PATTERN (insn));
11455 }
11456 add_reg_note (insn, REG_CFA_ADJUST_CFA, cfa);
11457 RTX_FRAME_RELATED_P (insn) = 1;
11458 }
11459
11460 /* Restore call saved fprs. */
11461
11462 if (TARGET_64BIT)
11463 {
11464 if (cfun_save_high_fprs_p)
11465 {
11466 next_offset = cfun_frame_layout.f8_offset;
11467 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
11468 {
11469 if (cfun_fpr_save_p (i))
11470 {
11471 restore_fpr (frame_pointer,
11472 offset + next_offset, i);
11473 cfa_restores
11474 = alloc_reg_note (REG_CFA_RESTORE,
11475 gen_rtx_REG (DFmode, i), cfa_restores);
11476 next_offset += 8;
11477 }
11478 }
11479 }
11480
11481 }
11482 else
11483 {
11484 next_offset = cfun_frame_layout.f4_offset;
11485 /* f4, f6 */
11486 for (i = FPR4_REGNUM; i <= FPR4_REGNUM + 1; i++)
11487 {
11488 if (cfun_fpr_save_p (i))
11489 {
11490 restore_fpr (frame_pointer,
11491 offset + next_offset, i);
11492 cfa_restores
11493 = alloc_reg_note (REG_CFA_RESTORE,
11494 gen_rtx_REG (DFmode, i), cfa_restores);
11495 next_offset += 8;
11496 }
11497 else if (!TARGET_PACKED_STACK)
11498 next_offset += 8;
11499 }
11500
11501 }
11502
11503 /* Return register. */
11504
11505 return_reg = gen_rtx_REG (Pmode, RETURN_REGNUM);
11506
11507 /* Restore call saved gprs. */
11508
11509 if (cfun_frame_layout.first_restore_gpr != -1)
11510 {
11511 rtx insn, addr;
11512 int i;
11513
11514 /* Check for global register and save them
11515 to stack location from where they get restored. */
11516
11517 for (i = cfun_frame_layout.first_restore_gpr;
11518 i <= cfun_frame_layout.last_restore_gpr;
11519 i++)
11520 {
11521 if (global_not_special_regno_p (i))
11522 {
11523 addr = plus_constant (Pmode, frame_pointer,
11524 offset + cfun_frame_layout.gprs_offset
11525 + (i - cfun_frame_layout.first_save_gpr_slot)
11526 * UNITS_PER_LONG);
11527 addr = gen_rtx_MEM (Pmode, addr);
11528 set_mem_alias_set (addr, get_frame_alias_set ());
11529 emit_move_insn (addr, gen_rtx_REG (Pmode, i));
11530 }
11531 else
11532 cfa_restores
11533 = alloc_reg_note (REG_CFA_RESTORE,
11534 gen_rtx_REG (Pmode, i), cfa_restores);
11535 }
11536
11537 /* Fetch return address from stack before load multiple,
11538 this will do good for scheduling.
11539
11540 Only do this if we already decided that r14 needs to be
11541 saved to a stack slot. (And not just because r14 happens to
11542 be in between two GPRs which need saving.) Otherwise it
11543 would be difficult to take that decision back in
11544 s390_optimize_prologue.
11545
11546 This optimization is only helpful on in-order machines. */
11547 if (! sibcall
11548 && cfun_gpr_save_slot (RETURN_REGNUM) == SAVE_SLOT_STACK
11549 && s390_tune <= PROCESSOR_2097_Z10)
11550 {
11551 int return_regnum = find_unused_clobbered_reg();
11552 if (!return_regnum)
11553 return_regnum = 4;
11554 return_reg = gen_rtx_REG (Pmode, return_regnum);
11555
11556 addr = plus_constant (Pmode, frame_pointer,
11557 offset + cfun_frame_layout.gprs_offset
11558 + (RETURN_REGNUM
11559 - cfun_frame_layout.first_save_gpr_slot)
11560 * UNITS_PER_LONG);
11561 addr = gen_rtx_MEM (Pmode, addr);
11562 set_mem_alias_set (addr, get_frame_alias_set ());
11563 emit_move_insn (return_reg, addr);
11564
11565 /* Once we did that optimization we have to make sure
11566 s390_optimize_prologue does not try to remove the store
11567 of r14 since we will not be able to find the load issued
11568 here. */
11569 cfun_frame_layout.save_return_addr_p = true;
11570 }
11571
11572 insn = restore_gprs (frame_pointer,
11573 offset + cfun_frame_layout.gprs_offset
11574 + (cfun_frame_layout.first_restore_gpr
11575 - cfun_frame_layout.first_save_gpr_slot)
11576 * UNITS_PER_LONG,
11577 cfun_frame_layout.first_restore_gpr,
11578 cfun_frame_layout.last_restore_gpr);
11579 insn = emit_insn (insn);
11580 REG_NOTES (insn) = cfa_restores;
11581 add_reg_note (insn, REG_CFA_DEF_CFA,
11582 plus_constant (Pmode, stack_pointer_rtx,
11583 STACK_POINTER_OFFSET));
11584 RTX_FRAME_RELATED_P (insn) = 1;
11585 }
11586
11587 s390_restore_gprs_from_fprs ();
11588
11589 if (! sibcall)
11590 {
11591
11592 /* Return to caller. */
11593
11594 p = rtvec_alloc (2);
11595
11596 RTVEC_ELT (p, 0) = ret_rtx;
11597 RTVEC_ELT (p, 1) = gen_rtx_USE (VOIDmode, return_reg);
11598 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, p));
11599 }
11600 }
11601
11602 /* Implement TARGET_SET_UP_BY_PROLOGUE. */
11603
11604 static void
11605 s300_set_up_by_prologue (hard_reg_set_container *regs)
11606 {
11607 if (cfun->machine->base_reg
11608 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11609 SET_HARD_REG_BIT (regs->set, REGNO (cfun->machine->base_reg));
11610 }
11611
11612 /* -fsplit-stack support. */
11613
11614 /* A SYMBOL_REF for __morestack. */
11615 static GTY(()) rtx morestack_ref;
11616
11617 /* When using -fsplit-stack, the allocation routines set a field in
11618 the TCB to the bottom of the stack plus this much space, measured
11619 in bytes. */
11620
11621 #define SPLIT_STACK_AVAILABLE 1024
11622
11623 /* Emit -fsplit-stack prologue, which goes before the regular function
11624 prologue. */
11625
11626 void
11627 s390_expand_split_stack_prologue (void)
11628 {
11629 rtx r1, guard, cc = NULL;
11630 rtx_insn *insn;
11631 /* Offset from thread pointer to __private_ss. */
11632 int psso = TARGET_64BIT ? 0x38 : 0x20;
11633 /* Pointer size in bytes. */
11634 /* Frame size and argument size - the two parameters to __morestack. */
11635 HOST_WIDE_INT frame_size = cfun_frame_layout.frame_size;
11636 /* Align argument size to 8 bytes - simplifies __morestack code. */
11637 HOST_WIDE_INT args_size = crtl->args.size >= 0
11638 ? ((crtl->args.size + 7) & ~7)
11639 : 0;
11640 /* Label to be called by __morestack. */
11641 rtx_code_label *call_done = NULL;
11642 rtx_code_label *parm_base = NULL;
11643 rtx tmp;
11644
11645 gcc_assert (flag_split_stack && reload_completed);
11646 if (!TARGET_CPU_ZARCH)
11647 {
11648 sorry ("CPUs older than z900 are not supported for -fsplit-stack");
11649 return;
11650 }
11651
11652 r1 = gen_rtx_REG (Pmode, 1);
11653
11654 /* If no stack frame will be allocated, don't do anything. */
11655 if (!frame_size)
11656 {
11657 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11658 {
11659 /* If va_start is used, just use r15. */
11660 emit_move_insn (r1,
11661 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11662 GEN_INT (STACK_POINTER_OFFSET)));
11663
11664 }
11665 return;
11666 }
11667
11668 if (morestack_ref == NULL_RTX)
11669 {
11670 morestack_ref = gen_rtx_SYMBOL_REF (Pmode, "__morestack");
11671 SYMBOL_REF_FLAGS (morestack_ref) |= (SYMBOL_FLAG_LOCAL
11672 | SYMBOL_FLAG_FUNCTION);
11673 }
11674
11675 if (CONST_OK_FOR_K (frame_size) || CONST_OK_FOR_Op (frame_size))
11676 {
11677 /* If frame_size will fit in an add instruction, do a stack space
11678 check, and only call __morestack if there's not enough space. */
11679
11680 /* Get thread pointer. r1 is the only register we can always destroy - r0
11681 could contain a static chain (and cannot be used to address memory
11682 anyway), r2-r6 can contain parameters, and r6-r15 are callee-saved. */
11683 emit_move_insn (r1, gen_rtx_REG (Pmode, TP_REGNUM));
11684 /* Aim at __private_ss. */
11685 guard = gen_rtx_MEM (Pmode, plus_constant (Pmode, r1, psso));
11686
11687 /* If less that 1kiB used, skip addition and compare directly with
11688 __private_ss. */
11689 if (frame_size > SPLIT_STACK_AVAILABLE)
11690 {
11691 emit_move_insn (r1, guard);
11692 if (TARGET_64BIT)
11693 emit_insn (gen_adddi3 (r1, r1, GEN_INT (frame_size)));
11694 else
11695 emit_insn (gen_addsi3 (r1, r1, GEN_INT (frame_size)));
11696 guard = r1;
11697 }
11698
11699 /* Compare the (maybe adjusted) guard with the stack pointer. */
11700 cc = s390_emit_compare (LT, stack_pointer_rtx, guard);
11701 }
11702
11703 call_done = gen_label_rtx ();
11704 parm_base = gen_label_rtx ();
11705
11706 /* Emit the parameter block. */
11707 tmp = gen_split_stack_data (parm_base, call_done,
11708 GEN_INT (frame_size),
11709 GEN_INT (args_size));
11710 insn = emit_insn (tmp);
11711 add_reg_note (insn, REG_LABEL_OPERAND, call_done);
11712 LABEL_NUSES (call_done)++;
11713 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11714 LABEL_NUSES (parm_base)++;
11715
11716 /* %r1 = litbase. */
11717 insn = emit_move_insn (r1, gen_rtx_LABEL_REF (VOIDmode, parm_base));
11718 add_reg_note (insn, REG_LABEL_OPERAND, parm_base);
11719 LABEL_NUSES (parm_base)++;
11720
11721 /* Now, we need to call __morestack. It has very special calling
11722 conventions: it preserves param/return/static chain registers for
11723 calling main function body, and looks for its own parameters at %r1. */
11724
11725 if (cc != NULL)
11726 {
11727 tmp = gen_split_stack_cond_call (morestack_ref, cc, call_done);
11728
11729 insn = emit_jump_insn (tmp);
11730 JUMP_LABEL (insn) = call_done;
11731 LABEL_NUSES (call_done)++;
11732
11733 /* Mark the jump as very unlikely to be taken. */
11734 add_reg_br_prob_note (insn,
11735 profile_probability::very_unlikely ());
11736
11737 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11738 {
11739 /* If va_start is used, and __morestack was not called, just use
11740 r15. */
11741 emit_move_insn (r1,
11742 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
11743 GEN_INT (STACK_POINTER_OFFSET)));
11744 }
11745 }
11746 else
11747 {
11748 tmp = gen_split_stack_call (morestack_ref, call_done);
11749 insn = emit_jump_insn (tmp);
11750 JUMP_LABEL (insn) = call_done;
11751 LABEL_NUSES (call_done)++;
11752 emit_barrier ();
11753 }
11754
11755 /* __morestack will call us here. */
11756
11757 emit_label (call_done);
11758 }
11759
11760 /* We may have to tell the dataflow pass that the split stack prologue
11761 is initializing a register. */
11762
11763 static void
11764 s390_live_on_entry (bitmap regs)
11765 {
11766 if (cfun->machine->split_stack_varargs_pointer != NULL_RTX)
11767 {
11768 gcc_assert (flag_split_stack);
11769 bitmap_set_bit (regs, 1);
11770 }
11771 }
11772
11773 /* Return true if the function can use simple_return to return outside
11774 of a shrink-wrapped region. At present shrink-wrapping is supported
11775 in all cases. */
11776
11777 bool
11778 s390_can_use_simple_return_insn (void)
11779 {
11780 return true;
11781 }
11782
11783 /* Return true if the epilogue is guaranteed to contain only a return
11784 instruction and if a direct return can therefore be used instead.
11785 One of the main advantages of using direct return instructions
11786 is that we can then use conditional returns. */
11787
11788 bool
11789 s390_can_use_return_insn (void)
11790 {
11791 int i;
11792
11793 if (!reload_completed)
11794 return false;
11795
11796 if (crtl->profile)
11797 return false;
11798
11799 if (TARGET_TPF_PROFILING)
11800 return false;
11801
11802 for (i = 0; i < 16; i++)
11803 if (cfun_gpr_save_slot (i) != SAVE_SLOT_NONE)
11804 return false;
11805
11806 /* For 31 bit this is not covered by the frame_size check below
11807 since f4, f6 are saved in the register save area without needing
11808 additional stack space. */
11809 if (!TARGET_64BIT
11810 && (cfun_fpr_save_p (FPR4_REGNUM) || cfun_fpr_save_p (FPR6_REGNUM)))
11811 return false;
11812
11813 if (cfun->machine->base_reg
11814 && !call_really_used_regs[REGNO (cfun->machine->base_reg)])
11815 return false;
11816
11817 return cfun_frame_layout.frame_size == 0;
11818 }
11819
11820 /* The VX ABI differs for vararg functions. Therefore we need the
11821 prototype of the callee to be available when passing vector type
11822 values. */
11823 static const char *
11824 s390_invalid_arg_for_unprototyped_fn (const_tree typelist, const_tree funcdecl, const_tree val)
11825 {
11826 return ((TARGET_VX_ABI
11827 && typelist == 0
11828 && VECTOR_TYPE_P (TREE_TYPE (val))
11829 && (funcdecl == NULL_TREE
11830 || (TREE_CODE (funcdecl) == FUNCTION_DECL
11831 && DECL_BUILT_IN_CLASS (funcdecl) != BUILT_IN_MD)))
11832 ? N_("vector argument passed to unprototyped function")
11833 : NULL);
11834 }
11835
11836
11837 /* Return the size in bytes of a function argument of
11838 type TYPE and/or mode MODE. At least one of TYPE or
11839 MODE must be specified. */
11840
11841 static int
11842 s390_function_arg_size (machine_mode mode, const_tree type)
11843 {
11844 if (type)
11845 return int_size_in_bytes (type);
11846
11847 /* No type info available for some library calls ... */
11848 if (mode != BLKmode)
11849 return GET_MODE_SIZE (mode);
11850
11851 /* If we have neither type nor mode, abort */
11852 gcc_unreachable ();
11853 }
11854
11855 /* Return true if a function argument of type TYPE and mode MODE
11856 is to be passed in a vector register, if available. */
11857
11858 bool
11859 s390_function_arg_vector (machine_mode mode, const_tree type)
11860 {
11861 if (!TARGET_VX_ABI)
11862 return false;
11863
11864 if (s390_function_arg_size (mode, type) > 16)
11865 return false;
11866
11867 /* No type info available for some library calls ... */
11868 if (!type)
11869 return VECTOR_MODE_P (mode);
11870
11871 /* The ABI says that record types with a single member are treated
11872 just like that member would be. */
11873 while (TREE_CODE (type) == RECORD_TYPE)
11874 {
11875 tree field, single = NULL_TREE;
11876
11877 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11878 {
11879 if (TREE_CODE (field) != FIELD_DECL)
11880 continue;
11881
11882 if (single == NULL_TREE)
11883 single = TREE_TYPE (field);
11884 else
11885 return false;
11886 }
11887
11888 if (single == NULL_TREE)
11889 return false;
11890 else
11891 {
11892 /* If the field declaration adds extra byte due to
11893 e.g. padding this is not accepted as vector type. */
11894 if (int_size_in_bytes (single) <= 0
11895 || int_size_in_bytes (single) != int_size_in_bytes (type))
11896 return false;
11897 type = single;
11898 }
11899 }
11900
11901 return VECTOR_TYPE_P (type);
11902 }
11903
11904 /* Return true if a function argument of type TYPE and mode MODE
11905 is to be passed in a floating-point register, if available. */
11906
11907 static bool
11908 s390_function_arg_float (machine_mode mode, const_tree type)
11909 {
11910 if (s390_function_arg_size (mode, type) > 8)
11911 return false;
11912
11913 /* Soft-float changes the ABI: no floating-point registers are used. */
11914 if (TARGET_SOFT_FLOAT)
11915 return false;
11916
11917 /* No type info available for some library calls ... */
11918 if (!type)
11919 return mode == SFmode || mode == DFmode || mode == SDmode || mode == DDmode;
11920
11921 /* The ABI says that record types with a single member are treated
11922 just like that member would be. */
11923 while (TREE_CODE (type) == RECORD_TYPE)
11924 {
11925 tree field, single = NULL_TREE;
11926
11927 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
11928 {
11929 if (TREE_CODE (field) != FIELD_DECL)
11930 continue;
11931
11932 if (single == NULL_TREE)
11933 single = TREE_TYPE (field);
11934 else
11935 return false;
11936 }
11937
11938 if (single == NULL_TREE)
11939 return false;
11940 else
11941 type = single;
11942 }
11943
11944 return TREE_CODE (type) == REAL_TYPE;
11945 }
11946
11947 /* Return true if a function argument of type TYPE and mode MODE
11948 is to be passed in an integer register, or a pair of integer
11949 registers, if available. */
11950
11951 static bool
11952 s390_function_arg_integer (machine_mode mode, const_tree type)
11953 {
11954 int size = s390_function_arg_size (mode, type);
11955 if (size > 8)
11956 return false;
11957
11958 /* No type info available for some library calls ... */
11959 if (!type)
11960 return GET_MODE_CLASS (mode) == MODE_INT
11961 || (TARGET_SOFT_FLOAT && SCALAR_FLOAT_MODE_P (mode));
11962
11963 /* We accept small integral (and similar) types. */
11964 if (INTEGRAL_TYPE_P (type)
11965 || POINTER_TYPE_P (type)
11966 || TREE_CODE (type) == NULLPTR_TYPE
11967 || TREE_CODE (type) == OFFSET_TYPE
11968 || (TARGET_SOFT_FLOAT && TREE_CODE (type) == REAL_TYPE))
11969 return true;
11970
11971 /* We also accept structs of size 1, 2, 4, 8 that are not
11972 passed in floating-point registers. */
11973 if (AGGREGATE_TYPE_P (type)
11974 && exact_log2 (size) >= 0
11975 && !s390_function_arg_float (mode, type))
11976 return true;
11977
11978 return false;
11979 }
11980
11981 /* Return 1 if a function argument of type TYPE and mode MODE
11982 is to be passed by reference. The ABI specifies that only
11983 structures of size 1, 2, 4, or 8 bytes are passed by value,
11984 all other structures (and complex numbers) are passed by
11985 reference. */
11986
11987 static bool
11988 s390_pass_by_reference (cumulative_args_t ca ATTRIBUTE_UNUSED,
11989 machine_mode mode, const_tree type,
11990 bool named ATTRIBUTE_UNUSED)
11991 {
11992 int size = s390_function_arg_size (mode, type);
11993
11994 if (s390_function_arg_vector (mode, type))
11995 return false;
11996
11997 if (size > 8)
11998 return true;
11999
12000 if (type)
12001 {
12002 if (AGGREGATE_TYPE_P (type) && exact_log2 (size) < 0)
12003 return true;
12004
12005 if (TREE_CODE (type) == COMPLEX_TYPE
12006 || TREE_CODE (type) == VECTOR_TYPE)
12007 return true;
12008 }
12009
12010 return false;
12011 }
12012
12013 /* Update the data in CUM to advance over an argument of mode MODE and
12014 data type TYPE. (TYPE is null for libcalls where that information
12015 may not be available.). The boolean NAMED specifies whether the
12016 argument is a named argument (as opposed to an unnamed argument
12017 matching an ellipsis). */
12018
12019 static void
12020 s390_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
12021 const_tree type, bool named)
12022 {
12023 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12024
12025 if (s390_function_arg_vector (mode, type))
12026 {
12027 /* We are called for unnamed vector stdarg arguments which are
12028 passed on the stack. In this case this hook does not have to
12029 do anything since stack arguments are tracked by common
12030 code. */
12031 if (!named)
12032 return;
12033 cum->vrs += 1;
12034 }
12035 else if (s390_function_arg_float (mode, type))
12036 {
12037 cum->fprs += 1;
12038 }
12039 else if (s390_function_arg_integer (mode, type))
12040 {
12041 int size = s390_function_arg_size (mode, type);
12042 cum->gprs += ((size + UNITS_PER_LONG - 1) / UNITS_PER_LONG);
12043 }
12044 else
12045 gcc_unreachable ();
12046 }
12047
12048 /* Define where to put the arguments to a function.
12049 Value is zero to push the argument on the stack,
12050 or a hard register in which to store the argument.
12051
12052 MODE is the argument's machine mode.
12053 TYPE is the data type of the argument (as a tree).
12054 This is null for libcalls where that information may
12055 not be available.
12056 CUM is a variable of type CUMULATIVE_ARGS which gives info about
12057 the preceding args and about the function being called.
12058 NAMED is nonzero if this argument is a named parameter
12059 (otherwise it is an extra parameter matching an ellipsis).
12060
12061 On S/390, we use general purpose registers 2 through 6 to
12062 pass integer, pointer, and certain structure arguments, and
12063 floating point registers 0 and 2 (0, 2, 4, and 6 on 64-bit)
12064 to pass floating point arguments. All remaining arguments
12065 are pushed to the stack. */
12066
12067 static rtx
12068 s390_function_arg (cumulative_args_t cum_v, machine_mode mode,
12069 const_tree type, bool named)
12070 {
12071 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
12072
12073 if (!named)
12074 s390_check_type_for_vector_abi (type, true, false);
12075
12076 if (s390_function_arg_vector (mode, type))
12077 {
12078 /* Vector arguments being part of the ellipsis are passed on the
12079 stack. */
12080 if (!named || (cum->vrs + 1 > VEC_ARG_NUM_REG))
12081 return NULL_RTX;
12082
12083 return gen_rtx_REG (mode, cum->vrs + FIRST_VEC_ARG_REGNO);
12084 }
12085 else if (s390_function_arg_float (mode, type))
12086 {
12087 if (cum->fprs + 1 > FP_ARG_NUM_REG)
12088 return NULL_RTX;
12089 else
12090 return gen_rtx_REG (mode, cum->fprs + 16);
12091 }
12092 else if (s390_function_arg_integer (mode, type))
12093 {
12094 int size = s390_function_arg_size (mode, type);
12095 int n_gprs = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12096
12097 if (cum->gprs + n_gprs > GP_ARG_NUM_REG)
12098 return NULL_RTX;
12099 else if (n_gprs == 1 || UNITS_PER_WORD == UNITS_PER_LONG)
12100 return gen_rtx_REG (mode, cum->gprs + 2);
12101 else if (n_gprs == 2)
12102 {
12103 rtvec p = rtvec_alloc (2);
12104
12105 RTVEC_ELT (p, 0)
12106 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 2),
12107 const0_rtx);
12108 RTVEC_ELT (p, 1)
12109 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, cum->gprs + 3),
12110 GEN_INT (4));
12111
12112 return gen_rtx_PARALLEL (mode, p);
12113 }
12114 }
12115
12116 /* After the real arguments, expand_call calls us once again
12117 with a void_type_node type. Whatever we return here is
12118 passed as operand 2 to the call expanders.
12119
12120 We don't need this feature ... */
12121 else if (type == void_type_node)
12122 return const0_rtx;
12123
12124 gcc_unreachable ();
12125 }
12126
12127 /* Implement TARGET_FUNCTION_ARG_BOUNDARY. Vector arguments are
12128 left-justified when placed on the stack during parameter passing. */
12129
12130 static pad_direction
12131 s390_function_arg_padding (machine_mode mode, const_tree type)
12132 {
12133 if (s390_function_arg_vector (mode, type))
12134 return PAD_UPWARD;
12135
12136 return default_function_arg_padding (mode, type);
12137 }
12138
12139 /* Return true if return values of type TYPE should be returned
12140 in a memory buffer whose address is passed by the caller as
12141 hidden first argument. */
12142
12143 static bool
12144 s390_return_in_memory (const_tree type, const_tree fundecl ATTRIBUTE_UNUSED)
12145 {
12146 /* We accept small integral (and similar) types. */
12147 if (INTEGRAL_TYPE_P (type)
12148 || POINTER_TYPE_P (type)
12149 || TREE_CODE (type) == OFFSET_TYPE
12150 || TREE_CODE (type) == REAL_TYPE)
12151 return int_size_in_bytes (type) > 8;
12152
12153 /* vector types which fit into a VR. */
12154 if (TARGET_VX_ABI
12155 && VECTOR_TYPE_P (type)
12156 && int_size_in_bytes (type) <= 16)
12157 return false;
12158
12159 /* Aggregates and similar constructs are always returned
12160 in memory. */
12161 if (AGGREGATE_TYPE_P (type)
12162 || TREE_CODE (type) == COMPLEX_TYPE
12163 || VECTOR_TYPE_P (type))
12164 return true;
12165
12166 /* ??? We get called on all sorts of random stuff from
12167 aggregate_value_p. We can't abort, but it's not clear
12168 what's safe to return. Pretend it's a struct I guess. */
12169 return true;
12170 }
12171
12172 /* Function arguments and return values are promoted to word size. */
12173
12174 static machine_mode
12175 s390_promote_function_mode (const_tree type, machine_mode mode,
12176 int *punsignedp,
12177 const_tree fntype ATTRIBUTE_UNUSED,
12178 int for_return ATTRIBUTE_UNUSED)
12179 {
12180 if (INTEGRAL_MODE_P (mode)
12181 && GET_MODE_SIZE (mode) < UNITS_PER_LONG)
12182 {
12183 if (type != NULL_TREE && POINTER_TYPE_P (type))
12184 *punsignedp = POINTERS_EXTEND_UNSIGNED;
12185 return Pmode;
12186 }
12187
12188 return mode;
12189 }
12190
12191 /* Define where to return a (scalar) value of type RET_TYPE.
12192 If RET_TYPE is null, define where to return a (scalar)
12193 value of mode MODE from a libcall. */
12194
12195 static rtx
12196 s390_function_and_libcall_value (machine_mode mode,
12197 const_tree ret_type,
12198 const_tree fntype_or_decl,
12199 bool outgoing ATTRIBUTE_UNUSED)
12200 {
12201 /* For vector return types it is important to use the RET_TYPE
12202 argument whenever available since the middle-end might have
12203 changed the mode to a scalar mode. */
12204 bool vector_ret_type_p = ((ret_type && VECTOR_TYPE_P (ret_type))
12205 || (!ret_type && VECTOR_MODE_P (mode)));
12206
12207 /* For normal functions perform the promotion as
12208 promote_function_mode would do. */
12209 if (ret_type)
12210 {
12211 int unsignedp = TYPE_UNSIGNED (ret_type);
12212 mode = promote_function_mode (ret_type, mode, &unsignedp,
12213 fntype_or_decl, 1);
12214 }
12215
12216 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
12217 || SCALAR_FLOAT_MODE_P (mode)
12218 || (TARGET_VX_ABI && vector_ret_type_p));
12219 gcc_assert (GET_MODE_SIZE (mode) <= (TARGET_VX_ABI ? 16 : 8));
12220
12221 if (TARGET_VX_ABI && vector_ret_type_p)
12222 return gen_rtx_REG (mode, FIRST_VEC_ARG_REGNO);
12223 else if (TARGET_HARD_FLOAT && SCALAR_FLOAT_MODE_P (mode))
12224 return gen_rtx_REG (mode, 16);
12225 else if (GET_MODE_SIZE (mode) <= UNITS_PER_LONG
12226 || UNITS_PER_LONG == UNITS_PER_WORD)
12227 return gen_rtx_REG (mode, 2);
12228 else if (GET_MODE_SIZE (mode) == 2 * UNITS_PER_LONG)
12229 {
12230 /* This case is triggered when returning a 64 bit value with
12231 -m31 -mzarch. Although the value would fit into a single
12232 register it has to be forced into a 32 bit register pair in
12233 order to match the ABI. */
12234 rtvec p = rtvec_alloc (2);
12235
12236 RTVEC_ELT (p, 0)
12237 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 2), const0_rtx);
12238 RTVEC_ELT (p, 1)
12239 = gen_rtx_EXPR_LIST (SImode, gen_rtx_REG (SImode, 3), GEN_INT (4));
12240
12241 return gen_rtx_PARALLEL (mode, p);
12242 }
12243
12244 gcc_unreachable ();
12245 }
12246
12247 /* Define where to return a scalar return value of type RET_TYPE. */
12248
12249 static rtx
12250 s390_function_value (const_tree ret_type, const_tree fn_decl_or_type,
12251 bool outgoing)
12252 {
12253 return s390_function_and_libcall_value (TYPE_MODE (ret_type), ret_type,
12254 fn_decl_or_type, outgoing);
12255 }
12256
12257 /* Define where to return a scalar libcall return value of mode
12258 MODE. */
12259
12260 static rtx
12261 s390_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
12262 {
12263 return s390_function_and_libcall_value (mode, NULL_TREE,
12264 NULL_TREE, true);
12265 }
12266
12267
12268 /* Create and return the va_list datatype.
12269
12270 On S/390, va_list is an array type equivalent to
12271
12272 typedef struct __va_list_tag
12273 {
12274 long __gpr;
12275 long __fpr;
12276 void *__overflow_arg_area;
12277 void *__reg_save_area;
12278 } va_list[1];
12279
12280 where __gpr and __fpr hold the number of general purpose
12281 or floating point arguments used up to now, respectively,
12282 __overflow_arg_area points to the stack location of the
12283 next argument passed on the stack, and __reg_save_area
12284 always points to the start of the register area in the
12285 call frame of the current function. The function prologue
12286 saves all registers used for argument passing into this
12287 area if the function uses variable arguments. */
12288
12289 static tree
12290 s390_build_builtin_va_list (void)
12291 {
12292 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
12293
12294 record = lang_hooks.types.make_type (RECORD_TYPE);
12295
12296 type_decl =
12297 build_decl (BUILTINS_LOCATION,
12298 TYPE_DECL, get_identifier ("__va_list_tag"), record);
12299
12300 f_gpr = build_decl (BUILTINS_LOCATION,
12301 FIELD_DECL, get_identifier ("__gpr"),
12302 long_integer_type_node);
12303 f_fpr = build_decl (BUILTINS_LOCATION,
12304 FIELD_DECL, get_identifier ("__fpr"),
12305 long_integer_type_node);
12306 f_ovf = build_decl (BUILTINS_LOCATION,
12307 FIELD_DECL, get_identifier ("__overflow_arg_area"),
12308 ptr_type_node);
12309 f_sav = build_decl (BUILTINS_LOCATION,
12310 FIELD_DECL, get_identifier ("__reg_save_area"),
12311 ptr_type_node);
12312
12313 va_list_gpr_counter_field = f_gpr;
12314 va_list_fpr_counter_field = f_fpr;
12315
12316 DECL_FIELD_CONTEXT (f_gpr) = record;
12317 DECL_FIELD_CONTEXT (f_fpr) = record;
12318 DECL_FIELD_CONTEXT (f_ovf) = record;
12319 DECL_FIELD_CONTEXT (f_sav) = record;
12320
12321 TYPE_STUB_DECL (record) = type_decl;
12322 TYPE_NAME (record) = type_decl;
12323 TYPE_FIELDS (record) = f_gpr;
12324 DECL_CHAIN (f_gpr) = f_fpr;
12325 DECL_CHAIN (f_fpr) = f_ovf;
12326 DECL_CHAIN (f_ovf) = f_sav;
12327
12328 layout_type (record);
12329
12330 /* The correct type is an array type of one element. */
12331 return build_array_type (record, build_index_type (size_zero_node));
12332 }
12333
12334 /* Implement va_start by filling the va_list structure VALIST.
12335 STDARG_P is always true, and ignored.
12336 NEXTARG points to the first anonymous stack argument.
12337
12338 The following global variables are used to initialize
12339 the va_list structure:
12340
12341 crtl->args.info:
12342 holds number of gprs and fprs used for named arguments.
12343 crtl->args.arg_offset_rtx:
12344 holds the offset of the first anonymous stack argument
12345 (relative to the virtual arg pointer). */
12346
12347 static void
12348 s390_va_start (tree valist, rtx nextarg ATTRIBUTE_UNUSED)
12349 {
12350 HOST_WIDE_INT n_gpr, n_fpr;
12351 int off;
12352 tree f_gpr, f_fpr, f_ovf, f_sav;
12353 tree gpr, fpr, ovf, sav, t;
12354
12355 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12356 f_fpr = DECL_CHAIN (f_gpr);
12357 f_ovf = DECL_CHAIN (f_fpr);
12358 f_sav = DECL_CHAIN (f_ovf);
12359
12360 valist = build_simple_mem_ref (valist);
12361 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12362 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12363 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12364 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12365
12366 /* Count number of gp and fp argument registers used. */
12367
12368 n_gpr = crtl->args.info.gprs;
12369 n_fpr = crtl->args.info.fprs;
12370
12371 if (cfun->va_list_gpr_size)
12372 {
12373 t = build2 (MODIFY_EXPR, TREE_TYPE (gpr), gpr,
12374 build_int_cst (NULL_TREE, n_gpr));
12375 TREE_SIDE_EFFECTS (t) = 1;
12376 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12377 }
12378
12379 if (cfun->va_list_fpr_size)
12380 {
12381 t = build2 (MODIFY_EXPR, TREE_TYPE (fpr), fpr,
12382 build_int_cst (NULL_TREE, n_fpr));
12383 TREE_SIDE_EFFECTS (t) = 1;
12384 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12385 }
12386
12387 if (flag_split_stack
12388 && (lookup_attribute ("no_split_stack", DECL_ATTRIBUTES (cfun->decl))
12389 == NULL)
12390 && cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12391 {
12392 rtx reg;
12393 rtx_insn *seq;
12394
12395 reg = gen_reg_rtx (Pmode);
12396 cfun->machine->split_stack_varargs_pointer = reg;
12397
12398 start_sequence ();
12399 emit_move_insn (reg, gen_rtx_REG (Pmode, 1));
12400 seq = get_insns ();
12401 end_sequence ();
12402
12403 push_topmost_sequence ();
12404 emit_insn_after (seq, entry_of_function ());
12405 pop_topmost_sequence ();
12406 }
12407
12408 /* Find the overflow area.
12409 FIXME: This currently is too pessimistic when the vector ABI is
12410 enabled. In that case we *always* set up the overflow area
12411 pointer. */
12412 if (n_gpr + cfun->va_list_gpr_size > GP_ARG_NUM_REG
12413 || n_fpr + cfun->va_list_fpr_size > FP_ARG_NUM_REG
12414 || TARGET_VX_ABI)
12415 {
12416 if (cfun->machine->split_stack_varargs_pointer == NULL_RTX)
12417 t = make_tree (TREE_TYPE (ovf), virtual_incoming_args_rtx);
12418 else
12419 t = make_tree (TREE_TYPE (ovf), cfun->machine->split_stack_varargs_pointer);
12420
12421 off = INTVAL (crtl->args.arg_offset_rtx);
12422 off = off < 0 ? 0 : off;
12423 if (TARGET_DEBUG_ARG)
12424 fprintf (stderr, "va_start: n_gpr = %d, n_fpr = %d off %d\n",
12425 (int)n_gpr, (int)n_fpr, off);
12426
12427 t = fold_build_pointer_plus_hwi (t, off);
12428
12429 t = build2 (MODIFY_EXPR, TREE_TYPE (ovf), ovf, t);
12430 TREE_SIDE_EFFECTS (t) = 1;
12431 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12432 }
12433
12434 /* Find the register save area. */
12435 if ((cfun->va_list_gpr_size && n_gpr < GP_ARG_NUM_REG)
12436 || (cfun->va_list_fpr_size && n_fpr < FP_ARG_NUM_REG))
12437 {
12438 t = make_tree (TREE_TYPE (sav), return_address_pointer_rtx);
12439 t = fold_build_pointer_plus_hwi (t, -RETURN_REGNUM * UNITS_PER_LONG);
12440
12441 t = build2 (MODIFY_EXPR, TREE_TYPE (sav), sav, t);
12442 TREE_SIDE_EFFECTS (t) = 1;
12443 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
12444 }
12445 }
12446
12447 /* Implement va_arg by updating the va_list structure
12448 VALIST as required to retrieve an argument of type
12449 TYPE, and returning that argument.
12450
12451 Generates code equivalent to:
12452
12453 if (integral value) {
12454 if (size <= 4 && args.gpr < 5 ||
12455 size > 4 && args.gpr < 4 )
12456 ret = args.reg_save_area[args.gpr+8]
12457 else
12458 ret = *args.overflow_arg_area++;
12459 } else if (vector value) {
12460 ret = *args.overflow_arg_area;
12461 args.overflow_arg_area += size / 8;
12462 } else if (float value) {
12463 if (args.fgpr < 2)
12464 ret = args.reg_save_area[args.fpr+64]
12465 else
12466 ret = *args.overflow_arg_area++;
12467 } else if (aggregate value) {
12468 if (args.gpr < 5)
12469 ret = *args.reg_save_area[args.gpr]
12470 else
12471 ret = **args.overflow_arg_area++;
12472 } */
12473
12474 static tree
12475 s390_gimplify_va_arg (tree valist, tree type, gimple_seq *pre_p,
12476 gimple_seq *post_p ATTRIBUTE_UNUSED)
12477 {
12478 tree f_gpr, f_fpr, f_ovf, f_sav;
12479 tree gpr, fpr, ovf, sav, reg, t, u;
12480 int indirect_p, size, n_reg, sav_ofs, sav_scale, max_reg;
12481 tree lab_false, lab_over = NULL_TREE;
12482 tree addr = create_tmp_var (ptr_type_node, "addr");
12483 bool left_align_p; /* How a value < UNITS_PER_LONG is aligned within
12484 a stack slot. */
12485
12486 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
12487 f_fpr = DECL_CHAIN (f_gpr);
12488 f_ovf = DECL_CHAIN (f_fpr);
12489 f_sav = DECL_CHAIN (f_ovf);
12490
12491 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
12492 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
12493 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
12494
12495 /* The tree for args* cannot be shared between gpr/fpr and ovf since
12496 both appear on a lhs. */
12497 valist = unshare_expr (valist);
12498 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
12499
12500 size = int_size_in_bytes (type);
12501
12502 s390_check_type_for_vector_abi (type, true, false);
12503
12504 if (pass_by_reference (NULL, TYPE_MODE (type), type, false))
12505 {
12506 if (TARGET_DEBUG_ARG)
12507 {
12508 fprintf (stderr, "va_arg: aggregate type");
12509 debug_tree (type);
12510 }
12511
12512 /* Aggregates are passed by reference. */
12513 indirect_p = 1;
12514 reg = gpr;
12515 n_reg = 1;
12516
12517 /* kernel stack layout on 31 bit: It is assumed here that no padding
12518 will be added by s390_frame_info because for va_args always an even
12519 number of gprs has to be saved r15-r2 = 14 regs. */
12520 sav_ofs = 2 * UNITS_PER_LONG;
12521 sav_scale = UNITS_PER_LONG;
12522 size = UNITS_PER_LONG;
12523 max_reg = GP_ARG_NUM_REG - n_reg;
12524 left_align_p = false;
12525 }
12526 else if (s390_function_arg_vector (TYPE_MODE (type), type))
12527 {
12528 if (TARGET_DEBUG_ARG)
12529 {
12530 fprintf (stderr, "va_arg: vector type");
12531 debug_tree (type);
12532 }
12533
12534 indirect_p = 0;
12535 reg = NULL_TREE;
12536 n_reg = 0;
12537 sav_ofs = 0;
12538 sav_scale = 8;
12539 max_reg = 0;
12540 left_align_p = true;
12541 }
12542 else if (s390_function_arg_float (TYPE_MODE (type), type))
12543 {
12544 if (TARGET_DEBUG_ARG)
12545 {
12546 fprintf (stderr, "va_arg: float type");
12547 debug_tree (type);
12548 }
12549
12550 /* FP args go in FP registers, if present. */
12551 indirect_p = 0;
12552 reg = fpr;
12553 n_reg = 1;
12554 sav_ofs = 16 * UNITS_PER_LONG;
12555 sav_scale = 8;
12556 max_reg = FP_ARG_NUM_REG - n_reg;
12557 left_align_p = false;
12558 }
12559 else
12560 {
12561 if (TARGET_DEBUG_ARG)
12562 {
12563 fprintf (stderr, "va_arg: other type");
12564 debug_tree (type);
12565 }
12566
12567 /* Otherwise into GP registers. */
12568 indirect_p = 0;
12569 reg = gpr;
12570 n_reg = (size + UNITS_PER_LONG - 1) / UNITS_PER_LONG;
12571
12572 /* kernel stack layout on 31 bit: It is assumed here that no padding
12573 will be added by s390_frame_info because for va_args always an even
12574 number of gprs has to be saved r15-r2 = 14 regs. */
12575 sav_ofs = 2 * UNITS_PER_LONG;
12576
12577 if (size < UNITS_PER_LONG)
12578 sav_ofs += UNITS_PER_LONG - size;
12579
12580 sav_scale = UNITS_PER_LONG;
12581 max_reg = GP_ARG_NUM_REG - n_reg;
12582 left_align_p = false;
12583 }
12584
12585 /* Pull the value out of the saved registers ... */
12586
12587 if (reg != NULL_TREE)
12588 {
12589 /*
12590 if (reg > ((typeof (reg))max_reg))
12591 goto lab_false;
12592
12593 addr = sav + sav_ofs + reg * save_scale;
12594
12595 goto lab_over;
12596
12597 lab_false:
12598 */
12599
12600 lab_false = create_artificial_label (UNKNOWN_LOCATION);
12601 lab_over = create_artificial_label (UNKNOWN_LOCATION);
12602
12603 t = fold_convert (TREE_TYPE (reg), size_int (max_reg));
12604 t = build2 (GT_EXPR, boolean_type_node, reg, t);
12605 u = build1 (GOTO_EXPR, void_type_node, lab_false);
12606 t = build3 (COND_EXPR, void_type_node, t, u, NULL_TREE);
12607 gimplify_and_add (t, pre_p);
12608
12609 t = fold_build_pointer_plus_hwi (sav, sav_ofs);
12610 u = build2 (MULT_EXPR, TREE_TYPE (reg), reg,
12611 fold_convert (TREE_TYPE (reg), size_int (sav_scale)));
12612 t = fold_build_pointer_plus (t, u);
12613
12614 gimplify_assign (addr, t, pre_p);
12615
12616 gimple_seq_add_stmt (pre_p, gimple_build_goto (lab_over));
12617
12618 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_false));
12619 }
12620
12621 /* ... Otherwise out of the overflow area. */
12622
12623 t = ovf;
12624 if (size < UNITS_PER_LONG && !left_align_p)
12625 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG - size);
12626
12627 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
12628
12629 gimplify_assign (addr, t, pre_p);
12630
12631 if (size < UNITS_PER_LONG && left_align_p)
12632 t = fold_build_pointer_plus_hwi (t, UNITS_PER_LONG);
12633 else
12634 t = fold_build_pointer_plus_hwi (t, size);
12635
12636 gimplify_assign (ovf, t, pre_p);
12637
12638 if (reg != NULL_TREE)
12639 gimple_seq_add_stmt (pre_p, gimple_build_label (lab_over));
12640
12641
12642 /* Increment register save count. */
12643
12644 if (n_reg > 0)
12645 {
12646 u = build2 (PREINCREMENT_EXPR, TREE_TYPE (reg), reg,
12647 fold_convert (TREE_TYPE (reg), size_int (n_reg)));
12648 gimplify_and_add (u, pre_p);
12649 }
12650
12651 if (indirect_p)
12652 {
12653 t = build_pointer_type_for_mode (build_pointer_type (type),
12654 ptr_mode, true);
12655 addr = fold_convert (t, addr);
12656 addr = build_va_arg_indirect_ref (addr);
12657 }
12658 else
12659 {
12660 t = build_pointer_type_for_mode (type, ptr_mode, true);
12661 addr = fold_convert (t, addr);
12662 }
12663
12664 return build_va_arg_indirect_ref (addr);
12665 }
12666
12667 /* Emit rtl for the tbegin or tbegin_retry (RETRY != NULL_RTX)
12668 expanders.
12669 DEST - Register location where CC will be stored.
12670 TDB - Pointer to a 256 byte area where to store the transaction.
12671 diagnostic block. NULL if TDB is not needed.
12672 RETRY - Retry count value. If non-NULL a retry loop for CC2
12673 is emitted
12674 CLOBBER_FPRS_P - If true clobbers for all FPRs are emitted as part
12675 of the tbegin instruction pattern. */
12676
12677 void
12678 s390_expand_tbegin (rtx dest, rtx tdb, rtx retry, bool clobber_fprs_p)
12679 {
12680 rtx retry_plus_two = gen_reg_rtx (SImode);
12681 rtx retry_reg = gen_reg_rtx (SImode);
12682 rtx_code_label *retry_label = NULL;
12683
12684 if (retry != NULL_RTX)
12685 {
12686 emit_move_insn (retry_reg, retry);
12687 emit_insn (gen_addsi3 (retry_plus_two, retry_reg, const2_rtx));
12688 emit_insn (gen_addsi3 (retry_reg, retry_reg, const1_rtx));
12689 retry_label = gen_label_rtx ();
12690 emit_label (retry_label);
12691 }
12692
12693 if (clobber_fprs_p)
12694 {
12695 if (TARGET_VX)
12696 emit_insn (gen_tbegin_1_z13 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12697 tdb));
12698 else
12699 emit_insn (gen_tbegin_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12700 tdb));
12701 }
12702 else
12703 emit_insn (gen_tbegin_nofloat_1 (gen_rtx_CONST_INT (VOIDmode, TBEGIN_MASK),
12704 tdb));
12705
12706 emit_move_insn (dest, gen_rtx_UNSPEC (SImode,
12707 gen_rtvec (1, gen_rtx_REG (CCRAWmode,
12708 CC_REGNUM)),
12709 UNSPEC_CC_TO_INT));
12710 if (retry != NULL_RTX)
12711 {
12712 const int CC0 = 1 << 3;
12713 const int CC1 = 1 << 2;
12714 const int CC3 = 1 << 0;
12715 rtx jump;
12716 rtx count = gen_reg_rtx (SImode);
12717 rtx_code_label *leave_label = gen_label_rtx ();
12718
12719 /* Exit for success and permanent failures. */
12720 jump = s390_emit_jump (leave_label,
12721 gen_rtx_EQ (VOIDmode,
12722 gen_rtx_REG (CCRAWmode, CC_REGNUM),
12723 gen_rtx_CONST_INT (VOIDmode, CC0 | CC1 | CC3)));
12724 LABEL_NUSES (leave_label) = 1;
12725
12726 /* CC2 - transient failure. Perform retry with ppa. */
12727 emit_move_insn (count, retry_plus_two);
12728 emit_insn (gen_subsi3 (count, count, retry_reg));
12729 emit_insn (gen_tx_assist (count));
12730 jump = emit_jump_insn (gen_doloop_si64 (retry_label,
12731 retry_reg,
12732 retry_reg));
12733 JUMP_LABEL (jump) = retry_label;
12734 LABEL_NUSES (retry_label) = 1;
12735 emit_label (leave_label);
12736 }
12737 }
12738
12739
12740 /* Return the decl for the target specific builtin with the function
12741 code FCODE. */
12742
12743 static tree
12744 s390_builtin_decl (unsigned fcode, bool initialized_p ATTRIBUTE_UNUSED)
12745 {
12746 if (fcode >= S390_BUILTIN_MAX)
12747 return error_mark_node;
12748
12749 return s390_builtin_decls[fcode];
12750 }
12751
12752 /* We call mcount before the function prologue. So a profiled leaf
12753 function should stay a leaf function. */
12754
12755 static bool
12756 s390_keep_leaf_when_profiled ()
12757 {
12758 return true;
12759 }
12760
12761 /* Output assembly code for the trampoline template to
12762 stdio stream FILE.
12763
12764 On S/390, we use gpr 1 internally in the trampoline code;
12765 gpr 0 is used to hold the static chain. */
12766
12767 static void
12768 s390_asm_trampoline_template (FILE *file)
12769 {
12770 rtx op[2];
12771 op[0] = gen_rtx_REG (Pmode, 0);
12772 op[1] = gen_rtx_REG (Pmode, 1);
12773
12774 if (TARGET_64BIT)
12775 {
12776 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12777 output_asm_insn ("lmg\t%0,%1,14(%1)", op); /* 6 byte */
12778 output_asm_insn ("br\t%1", op); /* 2 byte */
12779 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 10));
12780 }
12781 else
12782 {
12783 output_asm_insn ("basr\t%1,0", op); /* 2 byte */
12784 output_asm_insn ("lm\t%0,%1,6(%1)", op); /* 4 byte */
12785 output_asm_insn ("br\t%1", op); /* 2 byte */
12786 ASM_OUTPUT_SKIP (file, (HOST_WIDE_INT)(TRAMPOLINE_SIZE - 8));
12787 }
12788 }
12789
12790 /* Emit RTL insns to initialize the variable parts of a trampoline.
12791 FNADDR is an RTX for the address of the function's pure code.
12792 CXT is an RTX for the static chain value for the function. */
12793
12794 static void
12795 s390_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
12796 {
12797 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
12798 rtx mem;
12799
12800 emit_block_move (m_tramp, assemble_trampoline_template (),
12801 GEN_INT (2 * UNITS_PER_LONG), BLOCK_OP_NORMAL);
12802
12803 mem = adjust_address (m_tramp, Pmode, 2 * UNITS_PER_LONG);
12804 emit_move_insn (mem, cxt);
12805 mem = adjust_address (m_tramp, Pmode, 3 * UNITS_PER_LONG);
12806 emit_move_insn (mem, fnaddr);
12807 }
12808
12809 /* Output assembler code to FILE to increment profiler label # LABELNO
12810 for profiling a function entry. */
12811
12812 void
12813 s390_function_profiler (FILE *file, int labelno)
12814 {
12815 rtx op[7];
12816
12817 char label[128];
12818 ASM_GENERATE_INTERNAL_LABEL (label, "LP", labelno);
12819
12820 fprintf (file, "# function profiler \n");
12821
12822 op[0] = gen_rtx_REG (Pmode, RETURN_REGNUM);
12823 op[1] = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
12824 op[1] = gen_rtx_MEM (Pmode, plus_constant (Pmode, op[1], UNITS_PER_LONG));
12825
12826 op[2] = gen_rtx_REG (Pmode, 1);
12827 op[3] = gen_rtx_SYMBOL_REF (Pmode, label);
12828 SYMBOL_REF_FLAGS (op[3]) = SYMBOL_FLAG_LOCAL;
12829
12830 op[4] = gen_rtx_SYMBOL_REF (Pmode, "_mcount");
12831 if (flag_pic)
12832 {
12833 op[4] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[4]), UNSPEC_PLT);
12834 op[4] = gen_rtx_CONST (Pmode, op[4]);
12835 }
12836
12837 if (TARGET_64BIT)
12838 {
12839 output_asm_insn ("stg\t%0,%1", op);
12840 output_asm_insn ("larl\t%2,%3", op);
12841 output_asm_insn ("brasl\t%0,%4", op);
12842 output_asm_insn ("lg\t%0,%1", op);
12843 }
12844 else if (TARGET_CPU_ZARCH)
12845 {
12846 output_asm_insn ("st\t%0,%1", op);
12847 output_asm_insn ("larl\t%2,%3", op);
12848 output_asm_insn ("brasl\t%0,%4", op);
12849 output_asm_insn ("l\t%0,%1", op);
12850 }
12851 else if (!flag_pic)
12852 {
12853 op[6] = gen_label_rtx ();
12854
12855 output_asm_insn ("st\t%0,%1", op);
12856 output_asm_insn ("bras\t%2,%l6", op);
12857 output_asm_insn (".long\t%4", op);
12858 output_asm_insn (".long\t%3", op);
12859 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12860 output_asm_insn ("l\t%0,0(%2)", op);
12861 output_asm_insn ("l\t%2,4(%2)", op);
12862 output_asm_insn ("basr\t%0,%0", op);
12863 output_asm_insn ("l\t%0,%1", op);
12864 }
12865 else
12866 {
12867 op[5] = gen_label_rtx ();
12868 op[6] = gen_label_rtx ();
12869
12870 output_asm_insn ("st\t%0,%1", op);
12871 output_asm_insn ("bras\t%2,%l6", op);
12872 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[5]));
12873 output_asm_insn (".long\t%4-%l5", op);
12874 output_asm_insn (".long\t%3-%l5", op);
12875 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[6]));
12876 output_asm_insn ("lr\t%0,%2", op);
12877 output_asm_insn ("a\t%0,0(%2)", op);
12878 output_asm_insn ("a\t%2,4(%2)", op);
12879 output_asm_insn ("basr\t%0,%0", op);
12880 output_asm_insn ("l\t%0,%1", op);
12881 }
12882 }
12883
12884 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
12885 into its SYMBOL_REF_FLAGS. */
12886
12887 static void
12888 s390_encode_section_info (tree decl, rtx rtl, int first)
12889 {
12890 default_encode_section_info (decl, rtl, first);
12891
12892 if (TREE_CODE (decl) == VAR_DECL)
12893 {
12894 /* Store the alignment to be able to check if we can use
12895 a larl/load-relative instruction. We only handle the cases
12896 that can go wrong (i.e. no FUNC_DECLs). */
12897 if (DECL_ALIGN (decl) == 0 || DECL_ALIGN (decl) % 16)
12898 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12899 else if (DECL_ALIGN (decl) % 32)
12900 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12901 else if (DECL_ALIGN (decl) % 64)
12902 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12903 }
12904
12905 /* Literal pool references don't have a decl so they are handled
12906 differently here. We rely on the information in the MEM_ALIGN
12907 entry to decide upon the alignment. */
12908 if (MEM_P (rtl)
12909 && GET_CODE (XEXP (rtl, 0)) == SYMBOL_REF
12910 && TREE_CONSTANT_POOL_ADDRESS_P (XEXP (rtl, 0)))
12911 {
12912 if (MEM_ALIGN (rtl) == 0 || MEM_ALIGN (rtl) % 16)
12913 SYMBOL_FLAG_SET_NOTALIGN2 (XEXP (rtl, 0));
12914 else if (MEM_ALIGN (rtl) % 32)
12915 SYMBOL_FLAG_SET_NOTALIGN4 (XEXP (rtl, 0));
12916 else if (MEM_ALIGN (rtl) % 64)
12917 SYMBOL_FLAG_SET_NOTALIGN8 (XEXP (rtl, 0));
12918 }
12919 }
12920
12921 /* Output thunk to FILE that implements a C++ virtual function call (with
12922 multiple inheritance) to FUNCTION. The thunk adjusts the this pointer
12923 by DELTA, and unless VCALL_OFFSET is zero, applies an additional adjustment
12924 stored at VCALL_OFFSET in the vtable whose address is located at offset 0
12925 relative to the resulting this pointer. */
12926
12927 static void
12928 s390_output_mi_thunk (FILE *file, tree thunk ATTRIBUTE_UNUSED,
12929 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
12930 tree function)
12931 {
12932 rtx op[10];
12933 int nonlocal = 0;
12934
12935 /* Make sure unwind info is emitted for the thunk if needed. */
12936 final_start_function (emit_barrier (), file, 1);
12937
12938 /* Operand 0 is the target function. */
12939 op[0] = XEXP (DECL_RTL (function), 0);
12940 if (flag_pic && !SYMBOL_REF_LOCAL_P (op[0]))
12941 {
12942 nonlocal = 1;
12943 op[0] = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op[0]),
12944 TARGET_64BIT ? UNSPEC_PLT : UNSPEC_GOT);
12945 op[0] = gen_rtx_CONST (Pmode, op[0]);
12946 }
12947
12948 /* Operand 1 is the 'this' pointer. */
12949 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
12950 op[1] = gen_rtx_REG (Pmode, 3);
12951 else
12952 op[1] = gen_rtx_REG (Pmode, 2);
12953
12954 /* Operand 2 is the delta. */
12955 op[2] = GEN_INT (delta);
12956
12957 /* Operand 3 is the vcall_offset. */
12958 op[3] = GEN_INT (vcall_offset);
12959
12960 /* Operand 4 is the temporary register. */
12961 op[4] = gen_rtx_REG (Pmode, 1);
12962
12963 /* Operands 5 to 8 can be used as labels. */
12964 op[5] = NULL_RTX;
12965 op[6] = NULL_RTX;
12966 op[7] = NULL_RTX;
12967 op[8] = NULL_RTX;
12968
12969 /* Operand 9 can be used for temporary register. */
12970 op[9] = NULL_RTX;
12971
12972 /* Generate code. */
12973 if (TARGET_64BIT)
12974 {
12975 /* Setup literal pool pointer if required. */
12976 if ((!DISP_IN_RANGE (delta)
12977 && !CONST_OK_FOR_K (delta)
12978 && !CONST_OK_FOR_Os (delta))
12979 || (!DISP_IN_RANGE (vcall_offset)
12980 && !CONST_OK_FOR_K (vcall_offset)
12981 && !CONST_OK_FOR_Os (vcall_offset)))
12982 {
12983 op[5] = gen_label_rtx ();
12984 output_asm_insn ("larl\t%4,%5", op);
12985 }
12986
12987 /* Add DELTA to this pointer. */
12988 if (delta)
12989 {
12990 if (CONST_OK_FOR_J (delta))
12991 output_asm_insn ("la\t%1,%2(%1)", op);
12992 else if (DISP_IN_RANGE (delta))
12993 output_asm_insn ("lay\t%1,%2(%1)", op);
12994 else if (CONST_OK_FOR_K (delta))
12995 output_asm_insn ("aghi\t%1,%2", op);
12996 else if (CONST_OK_FOR_Os (delta))
12997 output_asm_insn ("agfi\t%1,%2", op);
12998 else
12999 {
13000 op[6] = gen_label_rtx ();
13001 output_asm_insn ("agf\t%1,%6-%5(%4)", op);
13002 }
13003 }
13004
13005 /* Perform vcall adjustment. */
13006 if (vcall_offset)
13007 {
13008 if (DISP_IN_RANGE (vcall_offset))
13009 {
13010 output_asm_insn ("lg\t%4,0(%1)", op);
13011 output_asm_insn ("ag\t%1,%3(%4)", op);
13012 }
13013 else if (CONST_OK_FOR_K (vcall_offset))
13014 {
13015 output_asm_insn ("lghi\t%4,%3", op);
13016 output_asm_insn ("ag\t%4,0(%1)", op);
13017 output_asm_insn ("ag\t%1,0(%4)", op);
13018 }
13019 else if (CONST_OK_FOR_Os (vcall_offset))
13020 {
13021 output_asm_insn ("lgfi\t%4,%3", op);
13022 output_asm_insn ("ag\t%4,0(%1)", op);
13023 output_asm_insn ("ag\t%1,0(%4)", op);
13024 }
13025 else
13026 {
13027 op[7] = gen_label_rtx ();
13028 output_asm_insn ("llgf\t%4,%7-%5(%4)", op);
13029 output_asm_insn ("ag\t%4,0(%1)", op);
13030 output_asm_insn ("ag\t%1,0(%4)", op);
13031 }
13032 }
13033
13034 /* Jump to target. */
13035 output_asm_insn ("jg\t%0", op);
13036
13037 /* Output literal pool if required. */
13038 if (op[5])
13039 {
13040 output_asm_insn (".align\t4", op);
13041 targetm.asm_out.internal_label (file, "L",
13042 CODE_LABEL_NUMBER (op[5]));
13043 }
13044 if (op[6])
13045 {
13046 targetm.asm_out.internal_label (file, "L",
13047 CODE_LABEL_NUMBER (op[6]));
13048 output_asm_insn (".long\t%2", op);
13049 }
13050 if (op[7])
13051 {
13052 targetm.asm_out.internal_label (file, "L",
13053 CODE_LABEL_NUMBER (op[7]));
13054 output_asm_insn (".long\t%3", op);
13055 }
13056 }
13057 else
13058 {
13059 /* Setup base pointer if required. */
13060 if (!vcall_offset
13061 || (!DISP_IN_RANGE (delta)
13062 && !CONST_OK_FOR_K (delta)
13063 && !CONST_OK_FOR_Os (delta))
13064 || (!DISP_IN_RANGE (delta)
13065 && !CONST_OK_FOR_K (vcall_offset)
13066 && !CONST_OK_FOR_Os (vcall_offset)))
13067 {
13068 op[5] = gen_label_rtx ();
13069 output_asm_insn ("basr\t%4,0", op);
13070 targetm.asm_out.internal_label (file, "L",
13071 CODE_LABEL_NUMBER (op[5]));
13072 }
13073
13074 /* Add DELTA to this pointer. */
13075 if (delta)
13076 {
13077 if (CONST_OK_FOR_J (delta))
13078 output_asm_insn ("la\t%1,%2(%1)", op);
13079 else if (DISP_IN_RANGE (delta))
13080 output_asm_insn ("lay\t%1,%2(%1)", op);
13081 else if (CONST_OK_FOR_K (delta))
13082 output_asm_insn ("ahi\t%1,%2", op);
13083 else if (CONST_OK_FOR_Os (delta))
13084 output_asm_insn ("afi\t%1,%2", op);
13085 else
13086 {
13087 op[6] = gen_label_rtx ();
13088 output_asm_insn ("a\t%1,%6-%5(%4)", op);
13089 }
13090 }
13091
13092 /* Perform vcall adjustment. */
13093 if (vcall_offset)
13094 {
13095 if (CONST_OK_FOR_J (vcall_offset))
13096 {
13097 output_asm_insn ("l\t%4,0(%1)", op);
13098 output_asm_insn ("a\t%1,%3(%4)", op);
13099 }
13100 else if (DISP_IN_RANGE (vcall_offset))
13101 {
13102 output_asm_insn ("l\t%4,0(%1)", op);
13103 output_asm_insn ("ay\t%1,%3(%4)", op);
13104 }
13105 else if (CONST_OK_FOR_K (vcall_offset))
13106 {
13107 output_asm_insn ("lhi\t%4,%3", op);
13108 output_asm_insn ("a\t%4,0(%1)", op);
13109 output_asm_insn ("a\t%1,0(%4)", op);
13110 }
13111 else if (CONST_OK_FOR_Os (vcall_offset))
13112 {
13113 output_asm_insn ("iilf\t%4,%3", op);
13114 output_asm_insn ("a\t%4,0(%1)", op);
13115 output_asm_insn ("a\t%1,0(%4)", op);
13116 }
13117 else
13118 {
13119 op[7] = gen_label_rtx ();
13120 output_asm_insn ("l\t%4,%7-%5(%4)", op);
13121 output_asm_insn ("a\t%4,0(%1)", op);
13122 output_asm_insn ("a\t%1,0(%4)", op);
13123 }
13124
13125 /* We had to clobber the base pointer register.
13126 Re-setup the base pointer (with a different base). */
13127 op[5] = gen_label_rtx ();
13128 output_asm_insn ("basr\t%4,0", op);
13129 targetm.asm_out.internal_label (file, "L",
13130 CODE_LABEL_NUMBER (op[5]));
13131 }
13132
13133 /* Jump to target. */
13134 op[8] = gen_label_rtx ();
13135
13136 if (!flag_pic)
13137 output_asm_insn ("l\t%4,%8-%5(%4)", op);
13138 else if (!nonlocal)
13139 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13140 /* We cannot call through .plt, since .plt requires %r12 loaded. */
13141 else if (flag_pic == 1)
13142 {
13143 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13144 output_asm_insn ("l\t%4,%0(%4)", op);
13145 }
13146 else if (flag_pic == 2)
13147 {
13148 op[9] = gen_rtx_REG (Pmode, 0);
13149 output_asm_insn ("l\t%9,%8-4-%5(%4)", op);
13150 output_asm_insn ("a\t%4,%8-%5(%4)", op);
13151 output_asm_insn ("ar\t%4,%9", op);
13152 output_asm_insn ("l\t%4,0(%4)", op);
13153 }
13154
13155 output_asm_insn ("br\t%4", op);
13156
13157 /* Output literal pool. */
13158 output_asm_insn (".align\t4", op);
13159
13160 if (nonlocal && flag_pic == 2)
13161 output_asm_insn (".long\t%0", op);
13162 if (nonlocal)
13163 {
13164 op[0] = gen_rtx_SYMBOL_REF (Pmode, "_GLOBAL_OFFSET_TABLE_");
13165 SYMBOL_REF_FLAGS (op[0]) = SYMBOL_FLAG_LOCAL;
13166 }
13167
13168 targetm.asm_out.internal_label (file, "L", CODE_LABEL_NUMBER (op[8]));
13169 if (!flag_pic)
13170 output_asm_insn (".long\t%0", op);
13171 else
13172 output_asm_insn (".long\t%0-%5", op);
13173
13174 if (op[6])
13175 {
13176 targetm.asm_out.internal_label (file, "L",
13177 CODE_LABEL_NUMBER (op[6]));
13178 output_asm_insn (".long\t%2", op);
13179 }
13180 if (op[7])
13181 {
13182 targetm.asm_out.internal_label (file, "L",
13183 CODE_LABEL_NUMBER (op[7]));
13184 output_asm_insn (".long\t%3", op);
13185 }
13186 }
13187 final_end_function ();
13188 }
13189
13190 static bool
13191 s390_valid_pointer_mode (scalar_int_mode mode)
13192 {
13193 return (mode == SImode || (TARGET_64BIT && mode == DImode));
13194 }
13195
13196 /* Checks whether the given CALL_EXPR would use a caller
13197 saved register. This is used to decide whether sibling call
13198 optimization could be performed on the respective function
13199 call. */
13200
13201 static bool
13202 s390_call_saved_register_used (tree call_expr)
13203 {
13204 CUMULATIVE_ARGS cum_v;
13205 cumulative_args_t cum;
13206 tree parameter;
13207 machine_mode mode;
13208 tree type;
13209 rtx parm_rtx;
13210 int reg, i;
13211
13212 INIT_CUMULATIVE_ARGS (cum_v, NULL, NULL, 0, 0);
13213 cum = pack_cumulative_args (&cum_v);
13214
13215 for (i = 0; i < call_expr_nargs (call_expr); i++)
13216 {
13217 parameter = CALL_EXPR_ARG (call_expr, i);
13218 gcc_assert (parameter);
13219
13220 /* For an undeclared variable passed as parameter we will get
13221 an ERROR_MARK node here. */
13222 if (TREE_CODE (parameter) == ERROR_MARK)
13223 return true;
13224
13225 type = TREE_TYPE (parameter);
13226 gcc_assert (type);
13227
13228 mode = TYPE_MODE (type);
13229 gcc_assert (mode);
13230
13231 /* We assume that in the target function all parameters are
13232 named. This only has an impact on vector argument register
13233 usage none of which is call-saved. */
13234 if (pass_by_reference (&cum_v, mode, type, true))
13235 {
13236 mode = Pmode;
13237 type = build_pointer_type (type);
13238 }
13239
13240 parm_rtx = s390_function_arg (cum, mode, type, true);
13241
13242 s390_function_arg_advance (cum, mode, type, true);
13243
13244 if (!parm_rtx)
13245 continue;
13246
13247 if (REG_P (parm_rtx))
13248 {
13249 for (reg = 0;
13250 reg < HARD_REGNO_NREGS (REGNO (parm_rtx), GET_MODE (parm_rtx));
13251 reg++)
13252 if (!call_used_regs[reg + REGNO (parm_rtx)])
13253 return true;
13254 }
13255
13256 if (GET_CODE (parm_rtx) == PARALLEL)
13257 {
13258 int i;
13259
13260 for (i = 0; i < XVECLEN (parm_rtx, 0); i++)
13261 {
13262 rtx r = XEXP (XVECEXP (parm_rtx, 0, i), 0);
13263
13264 gcc_assert (REG_P (r));
13265
13266 for (reg = 0;
13267 reg < HARD_REGNO_NREGS (REGNO (r), GET_MODE (r));
13268 reg++)
13269 if (!call_used_regs[reg + REGNO (r)])
13270 return true;
13271 }
13272 }
13273
13274 }
13275 return false;
13276 }
13277
13278 /* Return true if the given call expression can be
13279 turned into a sibling call.
13280 DECL holds the declaration of the function to be called whereas
13281 EXP is the call expression itself. */
13282
13283 static bool
13284 s390_function_ok_for_sibcall (tree decl, tree exp)
13285 {
13286 /* The TPF epilogue uses register 1. */
13287 if (TARGET_TPF_PROFILING)
13288 return false;
13289
13290 /* The 31 bit PLT code uses register 12 (GOT pointer - caller saved)
13291 which would have to be restored before the sibcall. */
13292 if (!TARGET_64BIT && flag_pic && decl && !targetm.binds_local_p (decl))
13293 return false;
13294
13295 /* Register 6 on s390 is available as an argument register but unfortunately
13296 "caller saved". This makes functions needing this register for arguments
13297 not suitable for sibcalls. */
13298 return !s390_call_saved_register_used (exp);
13299 }
13300
13301 /* Return the fixed registers used for condition codes. */
13302
13303 static bool
13304 s390_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
13305 {
13306 *p1 = CC_REGNUM;
13307 *p2 = INVALID_REGNUM;
13308
13309 return true;
13310 }
13311
13312 /* This function is used by the call expanders of the machine description.
13313 It emits the call insn itself together with the necessary operations
13314 to adjust the target address and returns the emitted insn.
13315 ADDR_LOCATION is the target address rtx
13316 TLS_CALL the location of the thread-local symbol
13317 RESULT_REG the register where the result of the call should be stored
13318 RETADDR_REG the register where the return address should be stored
13319 If this parameter is NULL_RTX the call is considered
13320 to be a sibling call. */
13321
13322 rtx_insn *
13323 s390_emit_call (rtx addr_location, rtx tls_call, rtx result_reg,
13324 rtx retaddr_reg)
13325 {
13326 bool plt_call = false;
13327 rtx_insn *insn;
13328 rtx call;
13329 rtx clobber;
13330 rtvec vec;
13331
13332 /* Direct function calls need special treatment. */
13333 if (GET_CODE (addr_location) == SYMBOL_REF)
13334 {
13335 /* When calling a global routine in PIC mode, we must
13336 replace the symbol itself with the PLT stub. */
13337 if (flag_pic && !SYMBOL_REF_LOCAL_P (addr_location))
13338 {
13339 if (TARGET_64BIT || retaddr_reg != NULL_RTX)
13340 {
13341 addr_location = gen_rtx_UNSPEC (Pmode,
13342 gen_rtvec (1, addr_location),
13343 UNSPEC_PLT);
13344 addr_location = gen_rtx_CONST (Pmode, addr_location);
13345 plt_call = true;
13346 }
13347 else
13348 /* For -fpic code the PLT entries might use r12 which is
13349 call-saved. Therefore we cannot do a sibcall when
13350 calling directly using a symbol ref. When reaching
13351 this point we decided (in s390_function_ok_for_sibcall)
13352 to do a sibcall for a function pointer but one of the
13353 optimizers was able to get rid of the function pointer
13354 by propagating the symbol ref into the call. This
13355 optimization is illegal for S/390 so we turn the direct
13356 call into a indirect call again. */
13357 addr_location = force_reg (Pmode, addr_location);
13358 }
13359
13360 /* Unless we can use the bras(l) insn, force the
13361 routine address into a register. */
13362 if (!TARGET_SMALL_EXEC && !TARGET_CPU_ZARCH)
13363 {
13364 if (flag_pic)
13365 addr_location = legitimize_pic_address (addr_location, 0);
13366 else
13367 addr_location = force_reg (Pmode, addr_location);
13368 }
13369 }
13370
13371 /* If it is already an indirect call or the code above moved the
13372 SYMBOL_REF to somewhere else make sure the address can be found in
13373 register 1. */
13374 if (retaddr_reg == NULL_RTX
13375 && GET_CODE (addr_location) != SYMBOL_REF
13376 && !plt_call)
13377 {
13378 emit_move_insn (gen_rtx_REG (Pmode, SIBCALL_REGNUM), addr_location);
13379 addr_location = gen_rtx_REG (Pmode, SIBCALL_REGNUM);
13380 }
13381
13382 addr_location = gen_rtx_MEM (QImode, addr_location);
13383 call = gen_rtx_CALL (VOIDmode, addr_location, const0_rtx);
13384
13385 if (result_reg != NULL_RTX)
13386 call = gen_rtx_SET (result_reg, call);
13387
13388 if (retaddr_reg != NULL_RTX)
13389 {
13390 clobber = gen_rtx_CLOBBER (VOIDmode, retaddr_reg);
13391
13392 if (tls_call != NULL_RTX)
13393 vec = gen_rtvec (3, call, clobber,
13394 gen_rtx_USE (VOIDmode, tls_call));
13395 else
13396 vec = gen_rtvec (2, call, clobber);
13397
13398 call = gen_rtx_PARALLEL (VOIDmode, vec);
13399 }
13400
13401 insn = emit_call_insn (call);
13402
13403 /* 31-bit PLT stubs and tls calls use the GOT register implicitly. */
13404 if ((!TARGET_64BIT && plt_call) || tls_call != NULL_RTX)
13405 {
13406 /* s390_function_ok_for_sibcall should
13407 have denied sibcalls in this case. */
13408 gcc_assert (retaddr_reg != NULL_RTX);
13409 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), gen_rtx_REG (Pmode, 12));
13410 }
13411 return insn;
13412 }
13413
13414 /* Implement TARGET_CONDITIONAL_REGISTER_USAGE. */
13415
13416 static void
13417 s390_conditional_register_usage (void)
13418 {
13419 int i;
13420
13421 if (flag_pic)
13422 {
13423 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13424 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
13425 }
13426 if (TARGET_CPU_ZARCH)
13427 {
13428 fixed_regs[BASE_REGNUM] = 0;
13429 call_used_regs[BASE_REGNUM] = 0;
13430 fixed_regs[RETURN_REGNUM] = 0;
13431 call_used_regs[RETURN_REGNUM] = 0;
13432 }
13433 if (TARGET_64BIT)
13434 {
13435 for (i = FPR8_REGNUM; i <= FPR15_REGNUM; i++)
13436 call_used_regs[i] = call_really_used_regs[i] = 0;
13437 }
13438 else
13439 {
13440 call_used_regs[FPR4_REGNUM] = call_really_used_regs[FPR4_REGNUM] = 0;
13441 call_used_regs[FPR6_REGNUM] = call_really_used_regs[FPR6_REGNUM] = 0;
13442 }
13443
13444 if (TARGET_SOFT_FLOAT)
13445 {
13446 for (i = FPR0_REGNUM; i <= FPR15_REGNUM; i++)
13447 call_used_regs[i] = fixed_regs[i] = 1;
13448 }
13449
13450 /* Disable v16 - v31 for non-vector target. */
13451 if (!TARGET_VX)
13452 {
13453 for (i = VR16_REGNUM; i <= VR31_REGNUM; i++)
13454 fixed_regs[i] = call_used_regs[i] = call_really_used_regs[i] = 1;
13455 }
13456 }
13457
13458 /* Corresponding function to eh_return expander. */
13459
13460 static GTY(()) rtx s390_tpf_eh_return_symbol;
13461 void
13462 s390_emit_tpf_eh_return (rtx target)
13463 {
13464 rtx_insn *insn;
13465 rtx reg, orig_ra;
13466
13467 if (!s390_tpf_eh_return_symbol)
13468 s390_tpf_eh_return_symbol = gen_rtx_SYMBOL_REF (Pmode, "__tpf_eh_return");
13469
13470 reg = gen_rtx_REG (Pmode, 2);
13471 orig_ra = gen_rtx_REG (Pmode, 3);
13472
13473 emit_move_insn (reg, target);
13474 emit_move_insn (orig_ra, get_hard_reg_initial_val (Pmode, RETURN_REGNUM));
13475 insn = s390_emit_call (s390_tpf_eh_return_symbol, NULL_RTX, reg,
13476 gen_rtx_REG (Pmode, RETURN_REGNUM));
13477 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), reg);
13478 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), orig_ra);
13479
13480 emit_move_insn (EH_RETURN_HANDLER_RTX, reg);
13481 }
13482
13483 /* Rework the prologue/epilogue to avoid saving/restoring
13484 registers unnecessarily. */
13485
13486 static void
13487 s390_optimize_prologue (void)
13488 {
13489 rtx_insn *insn, *new_insn, *next_insn;
13490
13491 /* Do a final recompute of the frame-related data. */
13492 s390_optimize_register_info ();
13493
13494 /* If all special registers are in fact used, there's nothing we
13495 can do, so no point in walking the insn list. */
13496
13497 if (cfun_frame_layout.first_save_gpr <= BASE_REGNUM
13498 && cfun_frame_layout.last_save_gpr >= BASE_REGNUM
13499 && (TARGET_CPU_ZARCH
13500 || (cfun_frame_layout.first_save_gpr <= RETURN_REGNUM
13501 && cfun_frame_layout.last_save_gpr >= RETURN_REGNUM)))
13502 return;
13503
13504 /* Search for prologue/epilogue insns and replace them. */
13505
13506 for (insn = get_insns (); insn; insn = next_insn)
13507 {
13508 int first, last, off;
13509 rtx set, base, offset;
13510 rtx pat;
13511
13512 next_insn = NEXT_INSN (insn);
13513
13514 if (! NONJUMP_INSN_P (insn) || ! RTX_FRAME_RELATED_P (insn))
13515 continue;
13516
13517 pat = PATTERN (insn);
13518
13519 /* Remove ldgr/lgdr instructions used for saving and restore
13520 GPRs if possible. */
13521 if (TARGET_Z10)
13522 {
13523 rtx tmp_pat = pat;
13524
13525 if (INSN_CODE (insn) == CODE_FOR_stack_restore_from_fpr)
13526 tmp_pat = XVECEXP (pat, 0, 0);
13527
13528 if (GET_CODE (tmp_pat) == SET
13529 && GET_MODE (SET_SRC (tmp_pat)) == DImode
13530 && REG_P (SET_SRC (tmp_pat))
13531 && REG_P (SET_DEST (tmp_pat)))
13532 {
13533 int src_regno = REGNO (SET_SRC (tmp_pat));
13534 int dest_regno = REGNO (SET_DEST (tmp_pat));
13535 int gpr_regno;
13536 int fpr_regno;
13537
13538 if (!((GENERAL_REGNO_P (src_regno)
13539 && FP_REGNO_P (dest_regno))
13540 || (FP_REGNO_P (src_regno)
13541 && GENERAL_REGNO_P (dest_regno))))
13542 continue;
13543
13544 gpr_regno = GENERAL_REGNO_P (src_regno) ? src_regno : dest_regno;
13545 fpr_regno = FP_REGNO_P (src_regno) ? src_regno : dest_regno;
13546
13547 /* GPR must be call-saved, FPR must be call-clobbered. */
13548 if (!call_really_used_regs[fpr_regno]
13549 || call_really_used_regs[gpr_regno])
13550 continue;
13551
13552 /* It must not happen that what we once saved in an FPR now
13553 needs a stack slot. */
13554 gcc_assert (cfun_gpr_save_slot (gpr_regno) != SAVE_SLOT_STACK);
13555
13556 if (cfun_gpr_save_slot (gpr_regno) == SAVE_SLOT_NONE)
13557 {
13558 remove_insn (insn);
13559 continue;
13560 }
13561 }
13562 }
13563
13564 if (GET_CODE (pat) == PARALLEL
13565 && store_multiple_operation (pat, VOIDmode))
13566 {
13567 set = XVECEXP (pat, 0, 0);
13568 first = REGNO (SET_SRC (set));
13569 last = first + XVECLEN (pat, 0) - 1;
13570 offset = const0_rtx;
13571 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13572 off = INTVAL (offset);
13573
13574 if (GET_CODE (base) != REG || off < 0)
13575 continue;
13576 if (cfun_frame_layout.first_save_gpr != -1
13577 && (cfun_frame_layout.first_save_gpr < first
13578 || cfun_frame_layout.last_save_gpr > last))
13579 continue;
13580 if (REGNO (base) != STACK_POINTER_REGNUM
13581 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13582 continue;
13583 if (first > BASE_REGNUM || last < BASE_REGNUM)
13584 continue;
13585
13586 if (cfun_frame_layout.first_save_gpr != -1)
13587 {
13588 rtx s_pat = save_gprs (base,
13589 off + (cfun_frame_layout.first_save_gpr
13590 - first) * UNITS_PER_LONG,
13591 cfun_frame_layout.first_save_gpr,
13592 cfun_frame_layout.last_save_gpr);
13593 new_insn = emit_insn_before (s_pat, insn);
13594 INSN_ADDRESSES_NEW (new_insn, -1);
13595 }
13596
13597 remove_insn (insn);
13598 continue;
13599 }
13600
13601 if (cfun_frame_layout.first_save_gpr == -1
13602 && GET_CODE (pat) == SET
13603 && GENERAL_REG_P (SET_SRC (pat))
13604 && GET_CODE (SET_DEST (pat)) == MEM)
13605 {
13606 set = pat;
13607 first = REGNO (SET_SRC (set));
13608 offset = const0_rtx;
13609 base = eliminate_constant_term (XEXP (SET_DEST (set), 0), &offset);
13610 off = INTVAL (offset);
13611
13612 if (GET_CODE (base) != REG || off < 0)
13613 continue;
13614 if (REGNO (base) != STACK_POINTER_REGNUM
13615 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13616 continue;
13617
13618 remove_insn (insn);
13619 continue;
13620 }
13621
13622 if (GET_CODE (pat) == PARALLEL
13623 && load_multiple_operation (pat, VOIDmode))
13624 {
13625 set = XVECEXP (pat, 0, 0);
13626 first = REGNO (SET_DEST (set));
13627 last = first + XVECLEN (pat, 0) - 1;
13628 offset = const0_rtx;
13629 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13630 off = INTVAL (offset);
13631
13632 if (GET_CODE (base) != REG || off < 0)
13633 continue;
13634
13635 if (cfun_frame_layout.first_restore_gpr != -1
13636 && (cfun_frame_layout.first_restore_gpr < first
13637 || cfun_frame_layout.last_restore_gpr > last))
13638 continue;
13639 if (REGNO (base) != STACK_POINTER_REGNUM
13640 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13641 continue;
13642 if (first > BASE_REGNUM || last < BASE_REGNUM)
13643 continue;
13644
13645 if (cfun_frame_layout.first_restore_gpr != -1)
13646 {
13647 rtx rpat = restore_gprs (base,
13648 off + (cfun_frame_layout.first_restore_gpr
13649 - first) * UNITS_PER_LONG,
13650 cfun_frame_layout.first_restore_gpr,
13651 cfun_frame_layout.last_restore_gpr);
13652
13653 /* Remove REG_CFA_RESTOREs for registers that we no
13654 longer need to save. */
13655 REG_NOTES (rpat) = REG_NOTES (insn);
13656 for (rtx *ptr = &REG_NOTES (rpat); *ptr; )
13657 if (REG_NOTE_KIND (*ptr) == REG_CFA_RESTORE
13658 && ((int) REGNO (XEXP (*ptr, 0))
13659 < cfun_frame_layout.first_restore_gpr))
13660 *ptr = XEXP (*ptr, 1);
13661 else
13662 ptr = &XEXP (*ptr, 1);
13663 new_insn = emit_insn_before (rpat, insn);
13664 RTX_FRAME_RELATED_P (new_insn) = 1;
13665 INSN_ADDRESSES_NEW (new_insn, -1);
13666 }
13667
13668 remove_insn (insn);
13669 continue;
13670 }
13671
13672 if (cfun_frame_layout.first_restore_gpr == -1
13673 && GET_CODE (pat) == SET
13674 && GENERAL_REG_P (SET_DEST (pat))
13675 && GET_CODE (SET_SRC (pat)) == MEM)
13676 {
13677 set = pat;
13678 first = REGNO (SET_DEST (set));
13679 offset = const0_rtx;
13680 base = eliminate_constant_term (XEXP (SET_SRC (set), 0), &offset);
13681 off = INTVAL (offset);
13682
13683 if (GET_CODE (base) != REG || off < 0)
13684 continue;
13685
13686 if (REGNO (base) != STACK_POINTER_REGNUM
13687 && REGNO (base) != HARD_FRAME_POINTER_REGNUM)
13688 continue;
13689
13690 remove_insn (insn);
13691 continue;
13692 }
13693 }
13694 }
13695
13696 /* On z10 and later the dynamic branch prediction must see the
13697 backward jump within a certain windows. If not it falls back to
13698 the static prediction. This function rearranges the loop backward
13699 branch in a way which makes the static prediction always correct.
13700 The function returns true if it added an instruction. */
13701 static bool
13702 s390_fix_long_loop_prediction (rtx_insn *insn)
13703 {
13704 rtx set = single_set (insn);
13705 rtx code_label, label_ref;
13706 rtx_insn *uncond_jump;
13707 rtx_insn *cur_insn;
13708 rtx tmp;
13709 int distance;
13710
13711 /* This will exclude branch on count and branch on index patterns
13712 since these are correctly statically predicted. */
13713 if (!set
13714 || SET_DEST (set) != pc_rtx
13715 || GET_CODE (SET_SRC(set)) != IF_THEN_ELSE)
13716 return false;
13717
13718 /* Skip conditional returns. */
13719 if (ANY_RETURN_P (XEXP (SET_SRC (set), 1))
13720 && XEXP (SET_SRC (set), 2) == pc_rtx)
13721 return false;
13722
13723 label_ref = (GET_CODE (XEXP (SET_SRC (set), 1)) == LABEL_REF ?
13724 XEXP (SET_SRC (set), 1) : XEXP (SET_SRC (set), 2));
13725
13726 gcc_assert (GET_CODE (label_ref) == LABEL_REF);
13727
13728 code_label = XEXP (label_ref, 0);
13729
13730 if (INSN_ADDRESSES (INSN_UID (code_label)) == -1
13731 || INSN_ADDRESSES (INSN_UID (insn)) == -1
13732 || (INSN_ADDRESSES (INSN_UID (insn))
13733 - INSN_ADDRESSES (INSN_UID (code_label)) < PREDICT_DISTANCE))
13734 return false;
13735
13736 for (distance = 0, cur_insn = PREV_INSN (insn);
13737 distance < PREDICT_DISTANCE - 6;
13738 distance += get_attr_length (cur_insn), cur_insn = PREV_INSN (cur_insn))
13739 if (!cur_insn || JUMP_P (cur_insn) || LABEL_P (cur_insn))
13740 return false;
13741
13742 rtx_code_label *new_label = gen_label_rtx ();
13743 uncond_jump = emit_jump_insn_after (
13744 gen_rtx_SET (pc_rtx,
13745 gen_rtx_LABEL_REF (VOIDmode, code_label)),
13746 insn);
13747 emit_label_after (new_label, uncond_jump);
13748
13749 tmp = XEXP (SET_SRC (set), 1);
13750 XEXP (SET_SRC (set), 1) = XEXP (SET_SRC (set), 2);
13751 XEXP (SET_SRC (set), 2) = tmp;
13752 INSN_CODE (insn) = -1;
13753
13754 XEXP (label_ref, 0) = new_label;
13755 JUMP_LABEL (insn) = new_label;
13756 JUMP_LABEL (uncond_jump) = code_label;
13757
13758 return true;
13759 }
13760
13761 /* Returns 1 if INSN reads the value of REG for purposes not related
13762 to addressing of memory, and 0 otherwise. */
13763 static int
13764 s390_non_addr_reg_read_p (rtx reg, rtx_insn *insn)
13765 {
13766 return reg_referenced_p (reg, PATTERN (insn))
13767 && !reg_used_in_mem_p (REGNO (reg), PATTERN (insn));
13768 }
13769
13770 /* Starting from INSN find_cond_jump looks downwards in the insn
13771 stream for a single jump insn which is the last user of the
13772 condition code set in INSN. */
13773 static rtx_insn *
13774 find_cond_jump (rtx_insn *insn)
13775 {
13776 for (; insn; insn = NEXT_INSN (insn))
13777 {
13778 rtx ite, cc;
13779
13780 if (LABEL_P (insn))
13781 break;
13782
13783 if (!JUMP_P (insn))
13784 {
13785 if (reg_mentioned_p (gen_rtx_REG (CCmode, CC_REGNUM), insn))
13786 break;
13787 continue;
13788 }
13789
13790 /* This will be triggered by a return. */
13791 if (GET_CODE (PATTERN (insn)) != SET)
13792 break;
13793
13794 gcc_assert (SET_DEST (PATTERN (insn)) == pc_rtx);
13795 ite = SET_SRC (PATTERN (insn));
13796
13797 if (GET_CODE (ite) != IF_THEN_ELSE)
13798 break;
13799
13800 cc = XEXP (XEXP (ite, 0), 0);
13801 if (!REG_P (cc) || !CC_REGNO_P (REGNO (cc)))
13802 break;
13803
13804 if (find_reg_note (insn, REG_DEAD, cc))
13805 return insn;
13806 break;
13807 }
13808
13809 return NULL;
13810 }
13811
13812 /* Swap the condition in COND and the operands in OP0 and OP1 so that
13813 the semantics does not change. If NULL_RTX is passed as COND the
13814 function tries to find the conditional jump starting with INSN. */
13815 static void
13816 s390_swap_cmp (rtx cond, rtx *op0, rtx *op1, rtx_insn *insn)
13817 {
13818 rtx tmp = *op0;
13819
13820 if (cond == NULL_RTX)
13821 {
13822 rtx_insn *jump = find_cond_jump (NEXT_INSN (insn));
13823 rtx set = jump ? single_set (jump) : NULL_RTX;
13824
13825 if (set == NULL_RTX)
13826 return;
13827
13828 cond = XEXP (SET_SRC (set), 0);
13829 }
13830
13831 *op0 = *op1;
13832 *op1 = tmp;
13833 PUT_CODE (cond, swap_condition (GET_CODE (cond)));
13834 }
13835
13836 /* On z10, instructions of the compare-and-branch family have the
13837 property to access the register occurring as second operand with
13838 its bits complemented. If such a compare is grouped with a second
13839 instruction that accesses the same register non-complemented, and
13840 if that register's value is delivered via a bypass, then the
13841 pipeline recycles, thereby causing significant performance decline.
13842 This function locates such situations and exchanges the two
13843 operands of the compare. The function return true whenever it
13844 added an insn. */
13845 static bool
13846 s390_z10_optimize_cmp (rtx_insn *insn)
13847 {
13848 rtx_insn *prev_insn, *next_insn;
13849 bool insn_added_p = false;
13850 rtx cond, *op0, *op1;
13851
13852 if (GET_CODE (PATTERN (insn)) == PARALLEL)
13853 {
13854 /* Handle compare and branch and branch on count
13855 instructions. */
13856 rtx pattern = single_set (insn);
13857
13858 if (!pattern
13859 || SET_DEST (pattern) != pc_rtx
13860 || GET_CODE (SET_SRC (pattern)) != IF_THEN_ELSE)
13861 return false;
13862
13863 cond = XEXP (SET_SRC (pattern), 0);
13864 op0 = &XEXP (cond, 0);
13865 op1 = &XEXP (cond, 1);
13866 }
13867 else if (GET_CODE (PATTERN (insn)) == SET)
13868 {
13869 rtx src, dest;
13870
13871 /* Handle normal compare instructions. */
13872 src = SET_SRC (PATTERN (insn));
13873 dest = SET_DEST (PATTERN (insn));
13874
13875 if (!REG_P (dest)
13876 || !CC_REGNO_P (REGNO (dest))
13877 || GET_CODE (src) != COMPARE)
13878 return false;
13879
13880 /* s390_swap_cmp will try to find the conditional
13881 jump when passing NULL_RTX as condition. */
13882 cond = NULL_RTX;
13883 op0 = &XEXP (src, 0);
13884 op1 = &XEXP (src, 1);
13885 }
13886 else
13887 return false;
13888
13889 if (!REG_P (*op0) || !REG_P (*op1))
13890 return false;
13891
13892 if (GET_MODE_CLASS (GET_MODE (*op0)) != MODE_INT)
13893 return false;
13894
13895 /* Swap the COMPARE arguments and its mask if there is a
13896 conflicting access in the previous insn. */
13897 prev_insn = prev_active_insn (insn);
13898 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13899 && reg_referenced_p (*op1, PATTERN (prev_insn)))
13900 s390_swap_cmp (cond, op0, op1, insn);
13901
13902 /* Check if there is a conflict with the next insn. If there
13903 was no conflict with the previous insn, then swap the
13904 COMPARE arguments and its mask. If we already swapped
13905 the operands, or if swapping them would cause a conflict
13906 with the previous insn, issue a NOP after the COMPARE in
13907 order to separate the two instuctions. */
13908 next_insn = next_active_insn (insn);
13909 if (next_insn != NULL_RTX && INSN_P (next_insn)
13910 && s390_non_addr_reg_read_p (*op1, next_insn))
13911 {
13912 if (prev_insn != NULL_RTX && INSN_P (prev_insn)
13913 && s390_non_addr_reg_read_p (*op0, prev_insn))
13914 {
13915 if (REGNO (*op1) == 0)
13916 emit_insn_after (gen_nop1 (), insn);
13917 else
13918 emit_insn_after (gen_nop (), insn);
13919 insn_added_p = true;
13920 }
13921 else
13922 s390_swap_cmp (cond, op0, op1, insn);
13923 }
13924 return insn_added_p;
13925 }
13926
13927 /* Number of INSNs to be scanned backward in the last BB of the loop
13928 and forward in the first BB of the loop. This usually should be a
13929 bit more than the number of INSNs which could go into one
13930 group. */
13931 #define S390_OSC_SCAN_INSN_NUM 5
13932
13933 /* Scan LOOP for static OSC collisions and return true if a osc_break
13934 should be issued for this loop. */
13935 static bool
13936 s390_adjust_loop_scan_osc (struct loop* loop)
13937
13938 {
13939 HARD_REG_SET modregs, newregs;
13940 rtx_insn *insn, *store_insn = NULL;
13941 rtx set;
13942 struct s390_address addr_store, addr_load;
13943 subrtx_iterator::array_type array;
13944 int insn_count;
13945
13946 CLEAR_HARD_REG_SET (modregs);
13947
13948 insn_count = 0;
13949 FOR_BB_INSNS_REVERSE (loop->latch, insn)
13950 {
13951 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13952 continue;
13953
13954 insn_count++;
13955 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13956 return false;
13957
13958 find_all_hard_reg_sets (insn, &newregs, true);
13959 IOR_HARD_REG_SET (modregs, newregs);
13960
13961 set = single_set (insn);
13962 if (!set)
13963 continue;
13964
13965 if (MEM_P (SET_DEST (set))
13966 && s390_decompose_address (XEXP (SET_DEST (set), 0), &addr_store))
13967 {
13968 store_insn = insn;
13969 break;
13970 }
13971 }
13972
13973 if (store_insn == NULL_RTX)
13974 return false;
13975
13976 insn_count = 0;
13977 FOR_BB_INSNS (loop->header, insn)
13978 {
13979 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
13980 continue;
13981
13982 if (insn == store_insn)
13983 return false;
13984
13985 insn_count++;
13986 if (insn_count > S390_OSC_SCAN_INSN_NUM)
13987 return false;
13988
13989 find_all_hard_reg_sets (insn, &newregs, true);
13990 IOR_HARD_REG_SET (modregs, newregs);
13991
13992 set = single_set (insn);
13993 if (!set)
13994 continue;
13995
13996 /* An intermediate store disrupts static OSC checking
13997 anyway. */
13998 if (MEM_P (SET_DEST (set))
13999 && s390_decompose_address (XEXP (SET_DEST (set), 0), NULL))
14000 return false;
14001
14002 FOR_EACH_SUBRTX (iter, array, SET_SRC (set), NONCONST)
14003 if (MEM_P (*iter)
14004 && s390_decompose_address (XEXP (*iter, 0), &addr_load)
14005 && rtx_equal_p (addr_load.base, addr_store.base)
14006 && rtx_equal_p (addr_load.indx, addr_store.indx)
14007 && rtx_equal_p (addr_load.disp, addr_store.disp))
14008 {
14009 if ((addr_load.base != NULL_RTX
14010 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.base)))
14011 || (addr_load.indx != NULL_RTX
14012 && TEST_HARD_REG_BIT (modregs, REGNO (addr_load.indx))))
14013 return true;
14014 }
14015 }
14016 return false;
14017 }
14018
14019 /* Look for adjustments which can be done on simple innermost
14020 loops. */
14021 static void
14022 s390_adjust_loops ()
14023 {
14024 struct loop *loop = NULL;
14025
14026 df_analyze ();
14027 compute_bb_for_insn ();
14028
14029 /* Find the loops. */
14030 loop_optimizer_init (AVOID_CFG_MODIFICATIONS);
14031
14032 FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
14033 {
14034 if (dump_file)
14035 {
14036 flow_loop_dump (loop, dump_file, NULL, 0);
14037 fprintf (dump_file, ";; OSC loop scan Loop: ");
14038 }
14039 if (loop->latch == NULL
14040 || pc_set (BB_END (loop->latch)) == NULL_RTX
14041 || !s390_adjust_loop_scan_osc (loop))
14042 {
14043 if (dump_file)
14044 {
14045 if (loop->latch == NULL)
14046 fprintf (dump_file, " muliple backward jumps\n");
14047 else
14048 {
14049 fprintf (dump_file, " header insn: %d latch insn: %d ",
14050 INSN_UID (BB_HEAD (loop->header)),
14051 INSN_UID (BB_END (loop->latch)));
14052 if (pc_set (BB_END (loop->latch)) == NULL_RTX)
14053 fprintf (dump_file, " loop does not end with jump\n");
14054 else
14055 fprintf (dump_file, " not instrumented\n");
14056 }
14057 }
14058 }
14059 else
14060 {
14061 rtx_insn *new_insn;
14062
14063 if (dump_file)
14064 fprintf (dump_file, " adding OSC break insn: ");
14065 new_insn = emit_insn_before (gen_osc_break (),
14066 BB_END (loop->latch));
14067 INSN_ADDRESSES_NEW (new_insn, -1);
14068 }
14069 }
14070
14071 loop_optimizer_finalize ();
14072
14073 df_finish_pass (false);
14074 }
14075
14076 /* Perform machine-dependent processing. */
14077
14078 static void
14079 s390_reorg (void)
14080 {
14081 bool pool_overflow = false;
14082 int hw_before, hw_after;
14083
14084 if (s390_tune == PROCESSOR_2964_Z13)
14085 s390_adjust_loops ();
14086
14087 /* Make sure all splits have been performed; splits after
14088 machine_dependent_reorg might confuse insn length counts. */
14089 split_all_insns_noflow ();
14090
14091 /* Install the main literal pool and the associated base
14092 register load insns.
14093
14094 In addition, there are two problematic situations we need
14095 to correct:
14096
14097 - the literal pool might be > 4096 bytes in size, so that
14098 some of its elements cannot be directly accessed
14099
14100 - a branch target might be > 64K away from the branch, so that
14101 it is not possible to use a PC-relative instruction.
14102
14103 To fix those, we split the single literal pool into multiple
14104 pool chunks, reloading the pool base register at various
14105 points throughout the function to ensure it always points to
14106 the pool chunk the following code expects, and / or replace
14107 PC-relative branches by absolute branches.
14108
14109 However, the two problems are interdependent: splitting the
14110 literal pool can move a branch further away from its target,
14111 causing the 64K limit to overflow, and on the other hand,
14112 replacing a PC-relative branch by an absolute branch means
14113 we need to put the branch target address into the literal
14114 pool, possibly causing it to overflow.
14115
14116 So, we loop trying to fix up both problems until we manage
14117 to satisfy both conditions at the same time. Note that the
14118 loop is guaranteed to terminate as every pass of the loop
14119 strictly decreases the total number of PC-relative branches
14120 in the function. (This is not completely true as there
14121 might be branch-over-pool insns introduced by chunkify_start.
14122 Those never need to be split however.) */
14123
14124 for (;;)
14125 {
14126 struct constant_pool *pool = NULL;
14127
14128 /* Collect the literal pool. */
14129 if (!pool_overflow)
14130 {
14131 pool = s390_mainpool_start ();
14132 if (!pool)
14133 pool_overflow = true;
14134 }
14135
14136 /* If literal pool overflowed, start to chunkify it. */
14137 if (pool_overflow)
14138 pool = s390_chunkify_start ();
14139
14140 /* Split out-of-range branches. If this has created new
14141 literal pool entries, cancel current chunk list and
14142 recompute it. zSeries machines have large branch
14143 instructions, so we never need to split a branch. */
14144 if (!TARGET_CPU_ZARCH && s390_split_branches ())
14145 {
14146 if (pool_overflow)
14147 s390_chunkify_cancel (pool);
14148 else
14149 s390_mainpool_cancel (pool);
14150
14151 continue;
14152 }
14153
14154 /* If we made it up to here, both conditions are satisfied.
14155 Finish up literal pool related changes. */
14156 if (pool_overflow)
14157 s390_chunkify_finish (pool);
14158 else
14159 s390_mainpool_finish (pool);
14160
14161 /* We're done splitting branches. */
14162 cfun->machine->split_branches_pending_p = false;
14163 break;
14164 }
14165
14166 /* Generate out-of-pool execute target insns. */
14167 if (TARGET_CPU_ZARCH)
14168 {
14169 rtx_insn *insn, *target;
14170 rtx label;
14171
14172 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14173 {
14174 label = s390_execute_label (insn);
14175 if (!label)
14176 continue;
14177
14178 gcc_assert (label != const0_rtx);
14179
14180 target = emit_label (XEXP (label, 0));
14181 INSN_ADDRESSES_NEW (target, -1);
14182
14183 target = emit_insn (s390_execute_target (insn));
14184 INSN_ADDRESSES_NEW (target, -1);
14185 }
14186 }
14187
14188 /* Try to optimize prologue and epilogue further. */
14189 s390_optimize_prologue ();
14190
14191 /* Walk over the insns and do some >=z10 specific changes. */
14192 if (s390_tune >= PROCESSOR_2097_Z10)
14193 {
14194 rtx_insn *insn;
14195 bool insn_added_p = false;
14196
14197 /* The insn lengths and addresses have to be up to date for the
14198 following manipulations. */
14199 shorten_branches (get_insns ());
14200
14201 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14202 {
14203 if (!INSN_P (insn) || INSN_CODE (insn) <= 0)
14204 continue;
14205
14206 if (JUMP_P (insn))
14207 insn_added_p |= s390_fix_long_loop_prediction (insn);
14208
14209 if ((GET_CODE (PATTERN (insn)) == PARALLEL
14210 || GET_CODE (PATTERN (insn)) == SET)
14211 && s390_tune == PROCESSOR_2097_Z10)
14212 insn_added_p |= s390_z10_optimize_cmp (insn);
14213 }
14214
14215 /* Adjust branches if we added new instructions. */
14216 if (insn_added_p)
14217 shorten_branches (get_insns ());
14218 }
14219
14220 s390_function_num_hotpatch_hw (current_function_decl, &hw_before, &hw_after);
14221 if (hw_after > 0)
14222 {
14223 rtx_insn *insn;
14224
14225 /* Insert NOPs for hotpatching. */
14226 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
14227 /* Emit NOPs
14228 1. inside the area covered by debug information to allow setting
14229 breakpoints at the NOPs,
14230 2. before any insn which results in an asm instruction,
14231 3. before in-function labels to avoid jumping to the NOPs, for
14232 example as part of a loop,
14233 4. before any barrier in case the function is completely empty
14234 (__builtin_unreachable ()) and has neither internal labels nor
14235 active insns.
14236 */
14237 if (active_insn_p (insn) || BARRIER_P (insn) || LABEL_P (insn))
14238 break;
14239 /* Output a series of NOPs before the first active insn. */
14240 while (insn && hw_after > 0)
14241 {
14242 if (hw_after >= 3 && TARGET_CPU_ZARCH)
14243 {
14244 emit_insn_before (gen_nop_6_byte (), insn);
14245 hw_after -= 3;
14246 }
14247 else if (hw_after >= 2)
14248 {
14249 emit_insn_before (gen_nop_4_byte (), insn);
14250 hw_after -= 2;
14251 }
14252 else
14253 {
14254 emit_insn_before (gen_nop_2_byte (), insn);
14255 hw_after -= 1;
14256 }
14257 }
14258 }
14259 }
14260
14261 /* Return true if INSN is a fp load insn writing register REGNO. */
14262 static inline bool
14263 s390_fpload_toreg (rtx_insn *insn, unsigned int regno)
14264 {
14265 rtx set;
14266 enum attr_type flag = s390_safe_attr_type (insn);
14267
14268 if (flag != TYPE_FLOADSF && flag != TYPE_FLOADDF)
14269 return false;
14270
14271 set = single_set (insn);
14272
14273 if (set == NULL_RTX)
14274 return false;
14275
14276 if (!REG_P (SET_DEST (set)) || !MEM_P (SET_SRC (set)))
14277 return false;
14278
14279 if (REGNO (SET_DEST (set)) != regno)
14280 return false;
14281
14282 return true;
14283 }
14284
14285 /* This value describes the distance to be avoided between an
14286 arithmetic fp instruction and an fp load writing the same register.
14287 Z10_EARLYLOAD_DISTANCE - 1 as well as Z10_EARLYLOAD_DISTANCE + 1 is
14288 fine but the exact value has to be avoided. Otherwise the FP
14289 pipeline will throw an exception causing a major penalty. */
14290 #define Z10_EARLYLOAD_DISTANCE 7
14291
14292 /* Rearrange the ready list in order to avoid the situation described
14293 for Z10_EARLYLOAD_DISTANCE. A problematic load instruction is
14294 moved to the very end of the ready list. */
14295 static void
14296 s390_z10_prevent_earlyload_conflicts (rtx_insn **ready, int *nready_p)
14297 {
14298 unsigned int regno;
14299 int nready = *nready_p;
14300 rtx_insn *tmp;
14301 int i;
14302 rtx_insn *insn;
14303 rtx set;
14304 enum attr_type flag;
14305 int distance;
14306
14307 /* Skip DISTANCE - 1 active insns. */
14308 for (insn = last_scheduled_insn, distance = Z10_EARLYLOAD_DISTANCE - 1;
14309 distance > 0 && insn != NULL_RTX;
14310 distance--, insn = prev_active_insn (insn))
14311 if (CALL_P (insn) || JUMP_P (insn))
14312 return;
14313
14314 if (insn == NULL_RTX)
14315 return;
14316
14317 set = single_set (insn);
14318
14319 if (set == NULL_RTX || !REG_P (SET_DEST (set))
14320 || GET_MODE_CLASS (GET_MODE (SET_DEST (set))) != MODE_FLOAT)
14321 return;
14322
14323 flag = s390_safe_attr_type (insn);
14324
14325 if (flag == TYPE_FLOADSF || flag == TYPE_FLOADDF)
14326 return;
14327
14328 regno = REGNO (SET_DEST (set));
14329 i = nready - 1;
14330
14331 while (!s390_fpload_toreg (ready[i], regno) && i > 0)
14332 i--;
14333
14334 if (!i)
14335 return;
14336
14337 tmp = ready[i];
14338 memmove (&ready[1], &ready[0], sizeof (rtx_insn *) * i);
14339 ready[0] = tmp;
14340 }
14341
14342
14343 /* The s390_sched_state variable tracks the state of the current or
14344 the last instruction group.
14345
14346 0,1,2 number of instructions scheduled in the current group
14347 3 the last group is complete - normal insns
14348 4 the last group was a cracked/expanded insn */
14349
14350 static int s390_sched_state;
14351
14352 #define S390_SCHED_STATE_NORMAL 3
14353 #define S390_SCHED_STATE_CRACKED 4
14354
14355 #define S390_SCHED_ATTR_MASK_CRACKED 0x1
14356 #define S390_SCHED_ATTR_MASK_EXPANDED 0x2
14357 #define S390_SCHED_ATTR_MASK_ENDGROUP 0x4
14358 #define S390_SCHED_ATTR_MASK_GROUPALONE 0x8
14359
14360 static unsigned int
14361 s390_get_sched_attrmask (rtx_insn *insn)
14362 {
14363 unsigned int mask = 0;
14364
14365 switch (s390_tune)
14366 {
14367 case PROCESSOR_2827_ZEC12:
14368 if (get_attr_zEC12_cracked (insn))
14369 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14370 if (get_attr_zEC12_expanded (insn))
14371 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14372 if (get_attr_zEC12_endgroup (insn))
14373 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14374 if (get_attr_zEC12_groupalone (insn))
14375 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14376 break;
14377 case PROCESSOR_2964_Z13:
14378 case PROCESSOR_3906_Z14:
14379 if (get_attr_z13_cracked (insn))
14380 mask |= S390_SCHED_ATTR_MASK_CRACKED;
14381 if (get_attr_z13_expanded (insn))
14382 mask |= S390_SCHED_ATTR_MASK_EXPANDED;
14383 if (get_attr_z13_endgroup (insn))
14384 mask |= S390_SCHED_ATTR_MASK_ENDGROUP;
14385 if (get_attr_z13_groupalone (insn))
14386 mask |= S390_SCHED_ATTR_MASK_GROUPALONE;
14387 break;
14388 default:
14389 gcc_unreachable ();
14390 }
14391 return mask;
14392 }
14393
14394 static unsigned int
14395 s390_get_unit_mask (rtx_insn *insn, int *units)
14396 {
14397 unsigned int mask = 0;
14398
14399 switch (s390_tune)
14400 {
14401 case PROCESSOR_2964_Z13:
14402 case PROCESSOR_3906_Z14:
14403 *units = 3;
14404 if (get_attr_z13_unit_lsu (insn))
14405 mask |= 1 << 0;
14406 if (get_attr_z13_unit_fxu (insn))
14407 mask |= 1 << 1;
14408 if (get_attr_z13_unit_vfu (insn))
14409 mask |= 1 << 2;
14410 break;
14411 default:
14412 gcc_unreachable ();
14413 }
14414 return mask;
14415 }
14416
14417 /* Return the scheduling score for INSN. The higher the score the
14418 better. The score is calculated from the OOO scheduling attributes
14419 of INSN and the scheduling state s390_sched_state. */
14420 static int
14421 s390_sched_score (rtx_insn *insn)
14422 {
14423 unsigned int mask = s390_get_sched_attrmask (insn);
14424 int score = 0;
14425
14426 switch (s390_sched_state)
14427 {
14428 case 0:
14429 /* Try to put insns into the first slot which would otherwise
14430 break a group. */
14431 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14432 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14433 score += 5;
14434 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14435 score += 10;
14436 /* fallthrough */
14437 case 1:
14438 /* Prefer not cracked insns while trying to put together a
14439 group. */
14440 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14441 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14442 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14443 score += 10;
14444 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) == 0)
14445 score += 5;
14446 break;
14447 case 2:
14448 /* Prefer not cracked insns while trying to put together a
14449 group. */
14450 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14451 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0
14452 && (mask & S390_SCHED_ATTR_MASK_GROUPALONE) == 0)
14453 score += 10;
14454 /* Prefer endgroup insns in the last slot. */
14455 if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0)
14456 score += 10;
14457 break;
14458 case S390_SCHED_STATE_NORMAL:
14459 /* Prefer not cracked insns if the last was not cracked. */
14460 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) == 0
14461 && (mask & S390_SCHED_ATTR_MASK_EXPANDED) == 0)
14462 score += 5;
14463 if ((mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14464 score += 10;
14465 break;
14466 case S390_SCHED_STATE_CRACKED:
14467 /* Try to keep cracked insns together to prevent them from
14468 interrupting groups. */
14469 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14470 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14471 score += 5;
14472 break;
14473 }
14474
14475 if (s390_tune >= PROCESSOR_2964_Z13)
14476 {
14477 int units, i;
14478 unsigned unit_mask, m = 1;
14479
14480 unit_mask = s390_get_unit_mask (insn, &units);
14481 gcc_assert (units <= MAX_SCHED_UNITS);
14482
14483 /* Add a score in range 0..MAX_SCHED_MIX_SCORE depending on how long
14484 ago the last insn of this unit type got scheduled. This is
14485 supposed to help providing a proper instruction mix to the
14486 CPU. */
14487 for (i = 0; i < units; i++, m <<= 1)
14488 if (m & unit_mask)
14489 score += (last_scheduled_unit_distance[i] * MAX_SCHED_MIX_SCORE /
14490 MAX_SCHED_MIX_DISTANCE);
14491 }
14492 return score;
14493 }
14494
14495 /* This function is called via hook TARGET_SCHED_REORDER before
14496 issuing one insn from list READY which contains *NREADYP entries.
14497 For target z10 it reorders load instructions to avoid early load
14498 conflicts in the floating point pipeline */
14499 static int
14500 s390_sched_reorder (FILE *file, int verbose,
14501 rtx_insn **ready, int *nreadyp, int clock ATTRIBUTE_UNUSED)
14502 {
14503 if (s390_tune == PROCESSOR_2097_Z10
14504 && reload_completed
14505 && *nreadyp > 1)
14506 s390_z10_prevent_earlyload_conflicts (ready, nreadyp);
14507
14508 if (s390_tune >= PROCESSOR_2827_ZEC12
14509 && reload_completed
14510 && *nreadyp > 1)
14511 {
14512 int i;
14513 int last_index = *nreadyp - 1;
14514 int max_index = -1;
14515 int max_score = -1;
14516 rtx_insn *tmp;
14517
14518 /* Just move the insn with the highest score to the top (the
14519 end) of the list. A full sort is not needed since a conflict
14520 in the hazard recognition cannot happen. So the top insn in
14521 the ready list will always be taken. */
14522 for (i = last_index; i >= 0; i--)
14523 {
14524 int score;
14525
14526 if (recog_memoized (ready[i]) < 0)
14527 continue;
14528
14529 score = s390_sched_score (ready[i]);
14530 if (score > max_score)
14531 {
14532 max_score = score;
14533 max_index = i;
14534 }
14535 }
14536
14537 if (max_index != -1)
14538 {
14539 if (max_index != last_index)
14540 {
14541 tmp = ready[max_index];
14542 ready[max_index] = ready[last_index];
14543 ready[last_index] = tmp;
14544
14545 if (verbose > 5)
14546 fprintf (file,
14547 ";;\t\tBACKEND: move insn %d to the top of list\n",
14548 INSN_UID (ready[last_index]));
14549 }
14550 else if (verbose > 5)
14551 fprintf (file,
14552 ";;\t\tBACKEND: best insn %d already on top\n",
14553 INSN_UID (ready[last_index]));
14554 }
14555
14556 if (verbose > 5)
14557 {
14558 fprintf (file, "ready list ooo attributes - sched state: %d\n",
14559 s390_sched_state);
14560
14561 for (i = last_index; i >= 0; i--)
14562 {
14563 unsigned int sched_mask;
14564 rtx_insn *insn = ready[i];
14565
14566 if (recog_memoized (insn) < 0)
14567 continue;
14568
14569 sched_mask = s390_get_sched_attrmask (insn);
14570 fprintf (file, ";;\t\tBACKEND: insn %d score: %d: ",
14571 INSN_UID (insn),
14572 s390_sched_score (insn));
14573 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ",\
14574 ((M) & sched_mask) ? #ATTR : "");
14575 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14576 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14577 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14578 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14579 #undef PRINT_SCHED_ATTR
14580 if (s390_tune >= PROCESSOR_2964_Z13)
14581 {
14582 unsigned int unit_mask, m = 1;
14583 int units, j;
14584
14585 unit_mask = s390_get_unit_mask (insn, &units);
14586 fprintf (file, "(units:");
14587 for (j = 0; j < units; j++, m <<= 1)
14588 if (m & unit_mask)
14589 fprintf (file, " u%d", j);
14590 fprintf (file, ")");
14591 }
14592 fprintf (file, "\n");
14593 }
14594 }
14595 }
14596
14597 return s390_issue_rate ();
14598 }
14599
14600
14601 /* This function is called via hook TARGET_SCHED_VARIABLE_ISSUE after
14602 the scheduler has issued INSN. It stores the last issued insn into
14603 last_scheduled_insn in order to make it available for
14604 s390_sched_reorder. */
14605 static int
14606 s390_sched_variable_issue (FILE *file, int verbose, rtx_insn *insn, int more)
14607 {
14608 last_scheduled_insn = insn;
14609
14610 if (s390_tune >= PROCESSOR_2827_ZEC12
14611 && reload_completed
14612 && recog_memoized (insn) >= 0)
14613 {
14614 unsigned int mask = s390_get_sched_attrmask (insn);
14615
14616 if ((mask & S390_SCHED_ATTR_MASK_CRACKED) != 0
14617 || (mask & S390_SCHED_ATTR_MASK_EXPANDED) != 0)
14618 s390_sched_state = S390_SCHED_STATE_CRACKED;
14619 else if ((mask & S390_SCHED_ATTR_MASK_ENDGROUP) != 0
14620 || (mask & S390_SCHED_ATTR_MASK_GROUPALONE) != 0)
14621 s390_sched_state = S390_SCHED_STATE_NORMAL;
14622 else
14623 {
14624 /* Only normal insns are left (mask == 0). */
14625 switch (s390_sched_state)
14626 {
14627 case 0:
14628 case 1:
14629 case 2:
14630 case S390_SCHED_STATE_NORMAL:
14631 if (s390_sched_state == S390_SCHED_STATE_NORMAL)
14632 s390_sched_state = 1;
14633 else
14634 s390_sched_state++;
14635
14636 break;
14637 case S390_SCHED_STATE_CRACKED:
14638 s390_sched_state = S390_SCHED_STATE_NORMAL;
14639 break;
14640 }
14641 }
14642
14643 if (s390_tune >= PROCESSOR_2964_Z13)
14644 {
14645 int units, i;
14646 unsigned unit_mask, m = 1;
14647
14648 unit_mask = s390_get_unit_mask (insn, &units);
14649 gcc_assert (units <= MAX_SCHED_UNITS);
14650
14651 for (i = 0; i < units; i++, m <<= 1)
14652 if (m & unit_mask)
14653 last_scheduled_unit_distance[i] = 0;
14654 else if (last_scheduled_unit_distance[i] < MAX_SCHED_MIX_DISTANCE)
14655 last_scheduled_unit_distance[i]++;
14656 }
14657
14658 if (verbose > 5)
14659 {
14660 unsigned int sched_mask;
14661
14662 sched_mask = s390_get_sched_attrmask (insn);
14663
14664 fprintf (file, ";;\t\tBACKEND: insn %d: ", INSN_UID (insn));
14665 #define PRINT_SCHED_ATTR(M, ATTR) fprintf (file, "%s ", ((M) & sched_mask) ? #ATTR : "");
14666 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_CRACKED, cracked);
14667 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_EXPANDED, expanded);
14668 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_ENDGROUP, endgroup);
14669 PRINT_SCHED_ATTR (S390_SCHED_ATTR_MASK_GROUPALONE, groupalone);
14670 #undef PRINT_SCHED_ATTR
14671
14672 if (s390_tune >= PROCESSOR_2964_Z13)
14673 {
14674 unsigned int unit_mask, m = 1;
14675 int units, j;
14676
14677 unit_mask = s390_get_unit_mask (insn, &units);
14678 fprintf (file, "(units:");
14679 for (j = 0; j < units; j++, m <<= 1)
14680 if (m & unit_mask)
14681 fprintf (file, " %d", j);
14682 fprintf (file, ")");
14683 }
14684 fprintf (file, " sched state: %d\n", s390_sched_state);
14685
14686 if (s390_tune >= PROCESSOR_2964_Z13)
14687 {
14688 int units, j;
14689
14690 s390_get_unit_mask (insn, &units);
14691
14692 fprintf (file, ";;\t\tBACKEND: units unused for: ");
14693 for (j = 0; j < units; j++)
14694 fprintf (file, "%d:%d ", j, last_scheduled_unit_distance[j]);
14695 fprintf (file, "\n");
14696 }
14697 }
14698 }
14699
14700 if (GET_CODE (PATTERN (insn)) != USE
14701 && GET_CODE (PATTERN (insn)) != CLOBBER)
14702 return more - 1;
14703 else
14704 return more;
14705 }
14706
14707 static void
14708 s390_sched_init (FILE *file ATTRIBUTE_UNUSED,
14709 int verbose ATTRIBUTE_UNUSED,
14710 int max_ready ATTRIBUTE_UNUSED)
14711 {
14712 last_scheduled_insn = NULL;
14713 memset (last_scheduled_unit_distance, 0, MAX_SCHED_UNITS * sizeof (int));
14714 s390_sched_state = 0;
14715 }
14716
14717 /* This target hook implementation for TARGET_LOOP_UNROLL_ADJUST calculates
14718 a new number struct loop *loop should be unrolled if tuned for cpus with
14719 a built-in stride prefetcher.
14720 The loop is analyzed for memory accesses by calling check_dpu for
14721 each rtx of the loop. Depending on the loop_depth and the amount of
14722 memory accesses a new number <=nunroll is returned to improve the
14723 behavior of the hardware prefetch unit. */
14724 static unsigned
14725 s390_loop_unroll_adjust (unsigned nunroll, struct loop *loop)
14726 {
14727 basic_block *bbs;
14728 rtx_insn *insn;
14729 unsigned i;
14730 unsigned mem_count = 0;
14731
14732 if (s390_tune < PROCESSOR_2097_Z10)
14733 return nunroll;
14734
14735 /* Count the number of memory references within the loop body. */
14736 bbs = get_loop_body (loop);
14737 subrtx_iterator::array_type array;
14738 for (i = 0; i < loop->num_nodes; i++)
14739 FOR_BB_INSNS (bbs[i], insn)
14740 if (INSN_P (insn) && INSN_CODE (insn) != -1)
14741 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
14742 if (MEM_P (*iter))
14743 mem_count += 1;
14744 free (bbs);
14745
14746 /* Prevent division by zero, and we do not need to adjust nunroll in this case. */
14747 if (mem_count == 0)
14748 return nunroll;
14749
14750 switch (loop_depth(loop))
14751 {
14752 case 1:
14753 return MIN (nunroll, 28 / mem_count);
14754 case 2:
14755 return MIN (nunroll, 22 / mem_count);
14756 default:
14757 return MIN (nunroll, 16 / mem_count);
14758 }
14759 }
14760
14761 /* Restore the current options. This is a hook function and also called
14762 internally. */
14763
14764 static void
14765 s390_function_specific_restore (struct gcc_options *opts,
14766 struct cl_target_option *ptr ATTRIBUTE_UNUSED)
14767 {
14768 opts->x_s390_cost_pointer = (long)processor_table[opts->x_s390_tune].cost;
14769 }
14770
14771 static void
14772 s390_option_override_internal (bool main_args_p,
14773 struct gcc_options *opts,
14774 const struct gcc_options *opts_set)
14775 {
14776 const char *prefix;
14777 const char *suffix;
14778
14779 /* Set up prefix/suffix so the error messages refer to either the command
14780 line argument, or the attribute(target). */
14781 if (main_args_p)
14782 {
14783 prefix = "-m";
14784 suffix = "";
14785 }
14786 else
14787 {
14788 prefix = "option(\"";
14789 suffix = "\")";
14790 }
14791
14792
14793 /* Architecture mode defaults according to ABI. */
14794 if (!(opts_set->x_target_flags & MASK_ZARCH))
14795 {
14796 if (TARGET_64BIT)
14797 opts->x_target_flags |= MASK_ZARCH;
14798 else
14799 opts->x_target_flags &= ~MASK_ZARCH;
14800 }
14801
14802 /* Set the march default in case it hasn't been specified on cmdline. */
14803 if (!opts_set->x_s390_arch)
14804 opts->x_s390_arch = PROCESSOR_2064_Z900;
14805 else if (opts->x_s390_arch == PROCESSOR_9672_G5
14806 || opts->x_s390_arch == PROCESSOR_9672_G6)
14807 warning (OPT_Wdeprecated, "%sarch=%s%s is deprecated and will be removed "
14808 "in future releases; use at least %sarch=z900%s",
14809 prefix, opts->x_s390_arch == PROCESSOR_9672_G5 ? "g5" : "g6",
14810 suffix, prefix, suffix);
14811
14812 opts->x_s390_arch_flags = processor_flags_table[(int) opts->x_s390_arch];
14813
14814 /* Determine processor to tune for. */
14815 if (!opts_set->x_s390_tune)
14816 opts->x_s390_tune = opts->x_s390_arch;
14817 else if (opts->x_s390_tune == PROCESSOR_9672_G5
14818 || opts->x_s390_tune == PROCESSOR_9672_G6)
14819 warning (OPT_Wdeprecated, "%stune=%s%s is deprecated and will be removed "
14820 "in future releases; use at least %stune=z900%s",
14821 prefix, opts->x_s390_tune == PROCESSOR_9672_G5 ? "g5" : "g6",
14822 suffix, prefix, suffix);
14823
14824 opts->x_s390_tune_flags = processor_flags_table[opts->x_s390_tune];
14825
14826 /* Sanity checks. */
14827 if (opts->x_s390_arch == PROCESSOR_NATIVE
14828 || opts->x_s390_tune == PROCESSOR_NATIVE)
14829 gcc_unreachable ();
14830 if (TARGET_ZARCH_P (opts->x_target_flags) && !TARGET_CPU_ZARCH_P (opts))
14831 error ("z/Architecture mode not supported on %s",
14832 processor_table[(int)opts->x_s390_arch].name);
14833 if (TARGET_64BIT && !TARGET_ZARCH_P (opts->x_target_flags))
14834 error ("64-bit ABI not supported in ESA/390 mode");
14835
14836 /* Enable hardware transactions if available and not explicitly
14837 disabled by user. E.g. with -m31 -march=zEC12 -mzarch */
14838 if (!TARGET_OPT_HTM_P (opts_set->x_target_flags))
14839 {
14840 if (TARGET_CPU_HTM_P (opts) && TARGET_ZARCH_P (opts->x_target_flags))
14841 opts->x_target_flags |= MASK_OPT_HTM;
14842 else
14843 opts->x_target_flags &= ~MASK_OPT_HTM;
14844 }
14845
14846 if (TARGET_OPT_VX_P (opts_set->x_target_flags))
14847 {
14848 if (TARGET_OPT_VX_P (opts->x_target_flags))
14849 {
14850 if (!TARGET_CPU_VX_P (opts))
14851 error ("hardware vector support not available on %s",
14852 processor_table[(int)opts->x_s390_arch].name);
14853 if (TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14854 error ("hardware vector support not available with -msoft-float");
14855 }
14856 }
14857 else
14858 {
14859 if (TARGET_CPU_VX_P (opts))
14860 /* Enable vector support if available and not explicitly disabled
14861 by user. E.g. with -m31 -march=z13 -mzarch */
14862 opts->x_target_flags |= MASK_OPT_VX;
14863 else
14864 opts->x_target_flags &= ~MASK_OPT_VX;
14865 }
14866
14867 /* Use hardware DFP if available and not explicitly disabled by
14868 user. E.g. with -m31 -march=z10 -mzarch */
14869 if (!TARGET_HARD_DFP_P (opts_set->x_target_flags))
14870 {
14871 if (TARGET_DFP_P (opts))
14872 opts->x_target_flags |= MASK_HARD_DFP;
14873 else
14874 opts->x_target_flags &= ~MASK_HARD_DFP;
14875 }
14876
14877 if (TARGET_HARD_DFP_P (opts->x_target_flags) && !TARGET_DFP_P (opts))
14878 {
14879 if (TARGET_HARD_DFP_P (opts_set->x_target_flags))
14880 {
14881 if (!TARGET_CPU_DFP_P (opts))
14882 error ("hardware decimal floating point instructions"
14883 " not available on %s",
14884 processor_table[(int)opts->x_s390_arch].name);
14885 if (!TARGET_ZARCH_P (opts->x_target_flags))
14886 error ("hardware decimal floating point instructions"
14887 " not available in ESA/390 mode");
14888 }
14889 else
14890 opts->x_target_flags &= ~MASK_HARD_DFP;
14891 }
14892
14893 if (TARGET_SOFT_FLOAT_P (opts_set->x_target_flags)
14894 && TARGET_SOFT_FLOAT_P (opts->x_target_flags))
14895 {
14896 if (TARGET_HARD_DFP_P (opts_set->x_target_flags)
14897 && TARGET_HARD_DFP_P (opts->x_target_flags))
14898 error ("-mhard-dfp can%'t be used in conjunction with -msoft-float");
14899
14900 opts->x_target_flags &= ~MASK_HARD_DFP;
14901 }
14902
14903 if (TARGET_BACKCHAIN_P (opts->x_target_flags)
14904 && TARGET_PACKED_STACK_P (opts->x_target_flags)
14905 && TARGET_HARD_FLOAT_P (opts->x_target_flags))
14906 error ("-mbackchain -mpacked-stack -mhard-float are not supported "
14907 "in combination");
14908
14909 if (opts->x_s390_stack_size)
14910 {
14911 if (opts->x_s390_stack_guard >= opts->x_s390_stack_size)
14912 error ("stack size must be greater than the stack guard value");
14913 else if (opts->x_s390_stack_size > 1 << 16)
14914 error ("stack size must not be greater than 64k");
14915 }
14916 else if (opts->x_s390_stack_guard)
14917 error ("-mstack-guard implies use of -mstack-size");
14918
14919 #ifdef TARGET_DEFAULT_LONG_DOUBLE_128
14920 if (!TARGET_LONG_DOUBLE_128_P (opts_set->x_target_flags))
14921 opts->x_target_flags |= MASK_LONG_DOUBLE_128;
14922 #endif
14923
14924 if (opts->x_s390_tune >= PROCESSOR_2097_Z10)
14925 {
14926 maybe_set_param_value (PARAM_MAX_UNROLLED_INSNS, 100,
14927 opts->x_param_values,
14928 opts_set->x_param_values);
14929 maybe_set_param_value (PARAM_MAX_UNROLL_TIMES, 32,
14930 opts->x_param_values,
14931 opts_set->x_param_values);
14932 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEELED_INSNS, 2000,
14933 opts->x_param_values,
14934 opts_set->x_param_values);
14935 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 64,
14936 opts->x_param_values,
14937 opts_set->x_param_values);
14938 }
14939
14940 maybe_set_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 256,
14941 opts->x_param_values,
14942 opts_set->x_param_values);
14943 /* values for loop prefetching */
14944 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE, 256,
14945 opts->x_param_values,
14946 opts_set->x_param_values);
14947 maybe_set_param_value (PARAM_L1_CACHE_SIZE, 128,
14948 opts->x_param_values,
14949 opts_set->x_param_values);
14950 /* s390 has more than 2 levels and the size is much larger. Since
14951 we are always running virtualized assume that we only get a small
14952 part of the caches above l1. */
14953 maybe_set_param_value (PARAM_L2_CACHE_SIZE, 1500,
14954 opts->x_param_values,
14955 opts_set->x_param_values);
14956 maybe_set_param_value (PARAM_PREFETCH_MIN_INSN_TO_MEM_RATIO, 2,
14957 opts->x_param_values,
14958 opts_set->x_param_values);
14959 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES, 6,
14960 opts->x_param_values,
14961 opts_set->x_param_values);
14962
14963 /* Use the alternative scheduling-pressure algorithm by default. */
14964 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, 2,
14965 opts->x_param_values,
14966 opts_set->x_param_values);
14967
14968 maybe_set_param_value (PARAM_MIN_VECT_LOOP_BOUND, 2,
14969 opts->x_param_values,
14970 opts_set->x_param_values);
14971
14972 /* Call target specific restore function to do post-init work. At the moment,
14973 this just sets opts->x_s390_cost_pointer. */
14974 s390_function_specific_restore (opts, NULL);
14975 }
14976
14977 static void
14978 s390_option_override (void)
14979 {
14980 unsigned int i;
14981 cl_deferred_option *opt;
14982 vec<cl_deferred_option> *v =
14983 (vec<cl_deferred_option> *) s390_deferred_options;
14984
14985 if (v)
14986 FOR_EACH_VEC_ELT (*v, i, opt)
14987 {
14988 switch (opt->opt_index)
14989 {
14990 case OPT_mhotpatch_:
14991 {
14992 int val1;
14993 int val2;
14994 char s[256];
14995 char *t;
14996
14997 strncpy (s, opt->arg, 256);
14998 s[255] = 0;
14999 t = strchr (s, ',');
15000 if (t != NULL)
15001 {
15002 *t = 0;
15003 t++;
15004 val1 = integral_argument (s);
15005 val2 = integral_argument (t);
15006 }
15007 else
15008 {
15009 val1 = -1;
15010 val2 = -1;
15011 }
15012 if (val1 == -1 || val2 == -1)
15013 {
15014 /* argument is not a plain number */
15015 error ("arguments to %qs should be non-negative integers",
15016 "-mhotpatch=n,m");
15017 break;
15018 }
15019 else if (val1 > s390_hotpatch_hw_max
15020 || val2 > s390_hotpatch_hw_max)
15021 {
15022 error ("argument to %qs is too large (max. %d)",
15023 "-mhotpatch=n,m", s390_hotpatch_hw_max);
15024 break;
15025 }
15026 s390_hotpatch_hw_before_label = val1;
15027 s390_hotpatch_hw_after_label = val2;
15028 break;
15029 }
15030 default:
15031 gcc_unreachable ();
15032 }
15033 }
15034
15035 /* Set up function hooks. */
15036 init_machine_status = s390_init_machine_status;
15037
15038 s390_option_override_internal (true, &global_options, &global_options_set);
15039
15040 /* Save the initial options in case the user does function specific
15041 options. */
15042 target_option_default_node = build_target_option_node (&global_options);
15043 target_option_current_node = target_option_default_node;
15044
15045 /* This cannot reside in s390_option_optimization_table since HAVE_prefetch
15046 requires the arch flags to be evaluated already. Since prefetching
15047 is beneficial on s390, we enable it if available. */
15048 if (flag_prefetch_loop_arrays < 0 && HAVE_prefetch && optimize >= 3)
15049 flag_prefetch_loop_arrays = 1;
15050
15051 if (!s390_pic_data_is_text_relative && !flag_pic)
15052 error ("-mno-pic-data-is-text-relative cannot be used without -fpic/-fPIC");
15053
15054 if (TARGET_TPF)
15055 {
15056 /* Don't emit DWARF3/4 unless specifically selected. The TPF
15057 debuggers do not yet support DWARF 3/4. */
15058 if (!global_options_set.x_dwarf_strict)
15059 dwarf_strict = 1;
15060 if (!global_options_set.x_dwarf_version)
15061 dwarf_version = 2;
15062 }
15063
15064 /* Register a target-specific optimization-and-lowering pass
15065 to run immediately before prologue and epilogue generation.
15066
15067 Registering the pass must be done at start up. It's
15068 convenient to do it here. */
15069 opt_pass *new_pass = new pass_s390_early_mach (g);
15070 struct register_pass_info insert_pass_s390_early_mach =
15071 {
15072 new_pass, /* pass */
15073 "pro_and_epilogue", /* reference_pass_name */
15074 1, /* ref_pass_instance_number */
15075 PASS_POS_INSERT_BEFORE /* po_op */
15076 };
15077 register_pass (&insert_pass_s390_early_mach);
15078 }
15079
15080 #if S390_USE_TARGET_ATTRIBUTE
15081 /* Inner function to process the attribute((target(...))), take an argument and
15082 set the current options from the argument. If we have a list, recursively go
15083 over the list. */
15084
15085 static bool
15086 s390_valid_target_attribute_inner_p (tree args,
15087 struct gcc_options *opts,
15088 struct gcc_options *new_opts_set,
15089 bool force_pragma)
15090 {
15091 char *next_optstr;
15092 bool ret = true;
15093
15094 #define S390_ATTRIB(S,O,A) { S, sizeof (S)-1, O, A, 0 }
15095 #define S390_PRAGMA(S,O,A) { S, sizeof (S)-1, O, A, 1 }
15096 static const struct
15097 {
15098 const char *string;
15099 size_t len;
15100 int opt;
15101 int has_arg;
15102 int only_as_pragma;
15103 } attrs[] = {
15104 /* enum options */
15105 S390_ATTRIB ("arch=", OPT_march_, 1),
15106 S390_ATTRIB ("tune=", OPT_mtune_, 1),
15107 /* uinteger options */
15108 S390_ATTRIB ("stack-guard=", OPT_mstack_guard_, 1),
15109 S390_ATTRIB ("stack-size=", OPT_mstack_size_, 1),
15110 S390_ATTRIB ("branch-cost=", OPT_mbranch_cost_, 1),
15111 S390_ATTRIB ("warn-framesize=", OPT_mwarn_framesize_, 1),
15112 /* flag options */
15113 S390_ATTRIB ("backchain", OPT_mbackchain, 0),
15114 S390_ATTRIB ("hard-dfp", OPT_mhard_dfp, 0),
15115 S390_ATTRIB ("hard-float", OPT_mhard_float, 0),
15116 S390_ATTRIB ("htm", OPT_mhtm, 0),
15117 S390_ATTRIB ("vx", OPT_mvx, 0),
15118 S390_ATTRIB ("packed-stack", OPT_mpacked_stack, 0),
15119 S390_ATTRIB ("small-exec", OPT_msmall_exec, 0),
15120 S390_ATTRIB ("soft-float", OPT_msoft_float, 0),
15121 S390_ATTRIB ("mvcle", OPT_mmvcle, 0),
15122 S390_PRAGMA ("zvector", OPT_mzvector, 0),
15123 /* boolean options */
15124 S390_ATTRIB ("warn-dynamicstack", OPT_mwarn_dynamicstack, 0),
15125 };
15126 #undef S390_ATTRIB
15127 #undef S390_PRAGMA
15128
15129 /* If this is a list, recurse to get the options. */
15130 if (TREE_CODE (args) == TREE_LIST)
15131 {
15132 bool ret = true;
15133 int num_pragma_values;
15134 int i;
15135
15136 /* Note: attribs.c:decl_attributes prepends the values from
15137 current_target_pragma to the list of target attributes. To determine
15138 whether we're looking at a value of the attribute or the pragma we
15139 assume that the first [list_length (current_target_pragma)] values in
15140 the list are the values from the pragma. */
15141 num_pragma_values = (!force_pragma && current_target_pragma != NULL)
15142 ? list_length (current_target_pragma) : 0;
15143 for (i = 0; args; args = TREE_CHAIN (args), i++)
15144 {
15145 bool is_pragma;
15146
15147 is_pragma = (force_pragma || i < num_pragma_values);
15148 if (TREE_VALUE (args)
15149 && !s390_valid_target_attribute_inner_p (TREE_VALUE (args),
15150 opts, new_opts_set,
15151 is_pragma))
15152 {
15153 ret = false;
15154 }
15155 }
15156 return ret;
15157 }
15158
15159 else if (TREE_CODE (args) != STRING_CST)
15160 {
15161 error ("attribute %<target%> argument not a string");
15162 return false;
15163 }
15164
15165 /* Handle multiple arguments separated by commas. */
15166 next_optstr = ASTRDUP (TREE_STRING_POINTER (args));
15167
15168 while (next_optstr && *next_optstr != '\0')
15169 {
15170 char *p = next_optstr;
15171 char *orig_p = p;
15172 char *comma = strchr (next_optstr, ',');
15173 size_t len, opt_len;
15174 int opt;
15175 bool opt_set_p;
15176 char ch;
15177 unsigned i;
15178 int mask = 0;
15179 enum cl_var_type var_type;
15180 bool found;
15181
15182 if (comma)
15183 {
15184 *comma = '\0';
15185 len = comma - next_optstr;
15186 next_optstr = comma + 1;
15187 }
15188 else
15189 {
15190 len = strlen (p);
15191 next_optstr = NULL;
15192 }
15193
15194 /* Recognize no-xxx. */
15195 if (len > 3 && p[0] == 'n' && p[1] == 'o' && p[2] == '-')
15196 {
15197 opt_set_p = false;
15198 p += 3;
15199 len -= 3;
15200 }
15201 else
15202 opt_set_p = true;
15203
15204 /* Find the option. */
15205 ch = *p;
15206 found = false;
15207 for (i = 0; i < ARRAY_SIZE (attrs); i++)
15208 {
15209 opt_len = attrs[i].len;
15210 if (ch == attrs[i].string[0]
15211 && ((attrs[i].has_arg) ? len > opt_len : len == opt_len)
15212 && memcmp (p, attrs[i].string, opt_len) == 0)
15213 {
15214 opt = attrs[i].opt;
15215 if (!opt_set_p && cl_options[opt].cl_reject_negative)
15216 continue;
15217 mask = cl_options[opt].var_value;
15218 var_type = cl_options[opt].var_type;
15219 found = true;
15220 break;
15221 }
15222 }
15223
15224 /* Process the option. */
15225 if (!found)
15226 {
15227 error ("attribute(target(\"%s\")) is unknown", orig_p);
15228 return false;
15229 }
15230 else if (attrs[i].only_as_pragma && !force_pragma)
15231 {
15232 /* Value is not allowed for the target attribute. */
15233 error ("value %qs is not supported by attribute %<target%>",
15234 attrs[i].string);
15235 return false;
15236 }
15237
15238 else if (var_type == CLVC_BIT_SET || var_type == CLVC_BIT_CLEAR)
15239 {
15240 if (var_type == CLVC_BIT_CLEAR)
15241 opt_set_p = !opt_set_p;
15242
15243 if (opt_set_p)
15244 opts->x_target_flags |= mask;
15245 else
15246 opts->x_target_flags &= ~mask;
15247 new_opts_set->x_target_flags |= mask;
15248 }
15249
15250 else if (cl_options[opt].var_type == CLVC_BOOLEAN)
15251 {
15252 int value;
15253
15254 if (cl_options[opt].cl_uinteger)
15255 {
15256 /* Unsigned integer argument. Code based on the function
15257 decode_cmdline_option () in opts-common.c. */
15258 value = integral_argument (p + opt_len);
15259 }
15260 else
15261 value = (opt_set_p) ? 1 : 0;
15262
15263 if (value != -1)
15264 {
15265 struct cl_decoded_option decoded;
15266
15267 /* Value range check; only implemented for numeric and boolean
15268 options at the moment. */
15269 generate_option (opt, NULL, value, CL_TARGET, &decoded);
15270 s390_handle_option (opts, new_opts_set, &decoded, input_location);
15271 set_option (opts, new_opts_set, opt, value,
15272 p + opt_len, DK_UNSPECIFIED, input_location,
15273 global_dc);
15274 }
15275 else
15276 {
15277 error ("attribute(target(\"%s\")) is unknown", orig_p);
15278 ret = false;
15279 }
15280 }
15281
15282 else if (cl_options[opt].var_type == CLVC_ENUM)
15283 {
15284 bool arg_ok;
15285 int value;
15286
15287 arg_ok = opt_enum_arg_to_value (opt, p + opt_len, &value, CL_TARGET);
15288 if (arg_ok)
15289 set_option (opts, new_opts_set, opt, value,
15290 p + opt_len, DK_UNSPECIFIED, input_location,
15291 global_dc);
15292 else
15293 {
15294 error ("attribute(target(\"%s\")) is unknown", orig_p);
15295 ret = false;
15296 }
15297 }
15298
15299 else
15300 gcc_unreachable ();
15301 }
15302 return ret;
15303 }
15304
15305 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
15306
15307 tree
15308 s390_valid_target_attribute_tree (tree args,
15309 struct gcc_options *opts,
15310 const struct gcc_options *opts_set,
15311 bool force_pragma)
15312 {
15313 tree t = NULL_TREE;
15314 struct gcc_options new_opts_set;
15315
15316 memset (&new_opts_set, 0, sizeof (new_opts_set));
15317
15318 /* Process each of the options on the chain. */
15319 if (! s390_valid_target_attribute_inner_p (args, opts, &new_opts_set,
15320 force_pragma))
15321 return error_mark_node;
15322
15323 /* If some option was set (even if it has not changed), rerun
15324 s390_option_override_internal, and then save the options away. */
15325 if (new_opts_set.x_target_flags
15326 || new_opts_set.x_s390_arch
15327 || new_opts_set.x_s390_tune
15328 || new_opts_set.x_s390_stack_guard
15329 || new_opts_set.x_s390_stack_size
15330 || new_opts_set.x_s390_branch_cost
15331 || new_opts_set.x_s390_warn_framesize
15332 || new_opts_set.x_s390_warn_dynamicstack_p)
15333 {
15334 const unsigned char *src = (const unsigned char *)opts_set;
15335 unsigned char *dest = (unsigned char *)&new_opts_set;
15336 unsigned int i;
15337
15338 /* Merge the original option flags into the new ones. */
15339 for (i = 0; i < sizeof(*opts_set); i++)
15340 dest[i] |= src[i];
15341
15342 /* Do any overrides, such as arch=xxx, or tune=xxx support. */
15343 s390_option_override_internal (false, opts, &new_opts_set);
15344 /* Save the current options unless we are validating options for
15345 #pragma. */
15346 t = build_target_option_node (opts);
15347 }
15348 return t;
15349 }
15350
15351 /* Hook to validate attribute((target("string"))). */
15352
15353 static bool
15354 s390_valid_target_attribute_p (tree fndecl,
15355 tree ARG_UNUSED (name),
15356 tree args,
15357 int ARG_UNUSED (flags))
15358 {
15359 struct gcc_options func_options;
15360 tree new_target, new_optimize;
15361 bool ret = true;
15362
15363 /* attribute((target("default"))) does nothing, beyond
15364 affecting multi-versioning. */
15365 if (TREE_VALUE (args)
15366 && TREE_CODE (TREE_VALUE (args)) == STRING_CST
15367 && TREE_CHAIN (args) == NULL_TREE
15368 && strcmp (TREE_STRING_POINTER (TREE_VALUE (args)), "default") == 0)
15369 return true;
15370
15371 tree old_optimize = build_optimization_node (&global_options);
15372
15373 /* Get the optimization options of the current function. */
15374 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
15375
15376 if (!func_optimize)
15377 func_optimize = old_optimize;
15378
15379 /* Init func_options. */
15380 memset (&func_options, 0, sizeof (func_options));
15381 init_options_struct (&func_options, NULL);
15382 lang_hooks.init_options_struct (&func_options);
15383
15384 cl_optimization_restore (&func_options, TREE_OPTIMIZATION (func_optimize));
15385
15386 /* Initialize func_options to the default before its target options can
15387 be set. */
15388 cl_target_option_restore (&func_options,
15389 TREE_TARGET_OPTION (target_option_default_node));
15390
15391 new_target = s390_valid_target_attribute_tree (args, &func_options,
15392 &global_options_set,
15393 (args ==
15394 current_target_pragma));
15395 new_optimize = build_optimization_node (&func_options);
15396 if (new_target == error_mark_node)
15397 ret = false;
15398 else if (fndecl && new_target)
15399 {
15400 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = new_target;
15401 if (old_optimize != new_optimize)
15402 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
15403 }
15404 return ret;
15405 }
15406
15407 /* Hook to determine if one function can safely inline another. */
15408
15409 static bool
15410 s390_can_inline_p (tree caller, tree callee)
15411 {
15412 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
15413 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
15414
15415 if (!callee_tree)
15416 callee_tree = target_option_default_node;
15417 if (!caller_tree)
15418 caller_tree = target_option_default_node;
15419 if (callee_tree == caller_tree)
15420 return true;
15421
15422 struct cl_target_option *caller_opts = TREE_TARGET_OPTION (caller_tree);
15423 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
15424 bool ret = true;
15425
15426 if ((caller_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP))
15427 != (callee_opts->x_target_flags & ~(MASK_SOFT_FLOAT | MASK_HARD_DFP)))
15428 ret = false;
15429
15430 /* Don't inline functions to be compiled for a more recent arch into a
15431 function for an older arch. */
15432 else if (caller_opts->x_s390_arch < callee_opts->x_s390_arch)
15433 ret = false;
15434
15435 /* Inlining a hard float function into a soft float function is only
15436 allowed if the hard float function doesn't actually make use of
15437 floating point.
15438
15439 We are called from FEs for multi-versioning call optimization, so
15440 beware of ipa_fn_summaries not available. */
15441 else if (((TARGET_SOFT_FLOAT_P (caller_opts->x_target_flags)
15442 && !TARGET_SOFT_FLOAT_P (callee_opts->x_target_flags))
15443 || (!TARGET_HARD_DFP_P (caller_opts->x_target_flags)
15444 && TARGET_HARD_DFP_P (callee_opts->x_target_flags)))
15445 && (! ipa_fn_summaries
15446 || ipa_fn_summaries->get
15447 (cgraph_node::get (callee))->fp_expressions))
15448 ret = false;
15449
15450 return ret;
15451 }
15452
15453 /* Restore targets globals from NEW_TREE and invalidate s390_previous_fndecl
15454 cache. */
15455
15456 void
15457 s390_activate_target_options (tree new_tree)
15458 {
15459 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
15460 if (TREE_TARGET_GLOBALS (new_tree))
15461 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
15462 else if (new_tree == target_option_default_node)
15463 restore_target_globals (&default_target_globals);
15464 else
15465 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
15466 s390_previous_fndecl = NULL_TREE;
15467 }
15468
15469 /* Establish appropriate back-end context for processing the function
15470 FNDECL. The argument might be NULL to indicate processing at top
15471 level, outside of any function scope. */
15472 static void
15473 s390_set_current_function (tree fndecl)
15474 {
15475 /* Only change the context if the function changes. This hook is called
15476 several times in the course of compiling a function, and we don't want to
15477 slow things down too much or call target_reinit when it isn't safe. */
15478 if (fndecl == s390_previous_fndecl)
15479 return;
15480
15481 tree old_tree;
15482 if (s390_previous_fndecl == NULL_TREE)
15483 old_tree = target_option_current_node;
15484 else if (DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl))
15485 old_tree = DECL_FUNCTION_SPECIFIC_TARGET (s390_previous_fndecl);
15486 else
15487 old_tree = target_option_default_node;
15488
15489 if (fndecl == NULL_TREE)
15490 {
15491 if (old_tree != target_option_current_node)
15492 s390_activate_target_options (target_option_current_node);
15493 return;
15494 }
15495
15496 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
15497 if (new_tree == NULL_TREE)
15498 new_tree = target_option_default_node;
15499
15500 if (old_tree != new_tree)
15501 s390_activate_target_options (new_tree);
15502 s390_previous_fndecl = fndecl;
15503 }
15504 #endif
15505
15506 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
15507
15508 static bool
15509 s390_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
15510 unsigned int align ATTRIBUTE_UNUSED,
15511 enum by_pieces_operation op ATTRIBUTE_UNUSED,
15512 bool speed_p ATTRIBUTE_UNUSED)
15513 {
15514 return (size == 1 || size == 2
15515 || size == 4 || (TARGET_ZARCH && size == 8));
15516 }
15517
15518 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV hook. */
15519
15520 static void
15521 s390_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
15522 {
15523 tree sfpc = s390_builtin_decls[S390_BUILTIN_s390_sfpc];
15524 tree efpc = s390_builtin_decls[S390_BUILTIN_s390_efpc];
15525 tree call_efpc = build_call_expr (efpc, 0);
15526 tree fenv_var = create_tmp_var_raw (unsigned_type_node);
15527
15528 #define FPC_EXCEPTION_MASK HOST_WIDE_INT_UC (0xf8000000)
15529 #define FPC_FLAGS_MASK HOST_WIDE_INT_UC (0x00f80000)
15530 #define FPC_DXC_MASK HOST_WIDE_INT_UC (0x0000ff00)
15531 #define FPC_EXCEPTION_MASK_SHIFT HOST_WIDE_INT_UC (24)
15532 #define FPC_FLAGS_SHIFT HOST_WIDE_INT_UC (16)
15533 #define FPC_DXC_SHIFT HOST_WIDE_INT_UC (8)
15534
15535 /* Generates the equivalent of feholdexcept (&fenv_var)
15536
15537 fenv_var = __builtin_s390_efpc ();
15538 __builtin_s390_sfpc (fenv_var & mask) */
15539 tree old_fpc = build2 (MODIFY_EXPR, unsigned_type_node, fenv_var, call_efpc);
15540 tree new_fpc =
15541 build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var,
15542 build_int_cst (unsigned_type_node,
15543 ~(FPC_DXC_MASK | FPC_FLAGS_MASK |
15544 FPC_EXCEPTION_MASK)));
15545 tree set_new_fpc = build_call_expr (sfpc, 1, new_fpc);
15546 *hold = build2 (COMPOUND_EXPR, void_type_node, old_fpc, set_new_fpc);
15547
15548 /* Generates the equivalent of feclearexcept (FE_ALL_EXCEPT)
15549
15550 __builtin_s390_sfpc (__builtin_s390_efpc () & mask) */
15551 new_fpc = build2 (BIT_AND_EXPR, unsigned_type_node, call_efpc,
15552 build_int_cst (unsigned_type_node,
15553 ~(FPC_DXC_MASK | FPC_FLAGS_MASK)));
15554 *clear = build_call_expr (sfpc, 1, new_fpc);
15555
15556 /* Generates the equivalent of feupdateenv (fenv_var)
15557
15558 old_fpc = __builtin_s390_efpc ();
15559 __builtin_s390_sfpc (fenv_var);
15560 __atomic_feraiseexcept ((old_fpc & FPC_FLAGS_MASK) >> FPC_FLAGS_SHIFT); */
15561
15562 old_fpc = create_tmp_var_raw (unsigned_type_node);
15563 tree store_old_fpc = build2 (MODIFY_EXPR, void_type_node,
15564 old_fpc, call_efpc);
15565
15566 set_new_fpc = build_call_expr (sfpc, 1, fenv_var);
15567
15568 tree raise_old_except = build2 (BIT_AND_EXPR, unsigned_type_node, old_fpc,
15569 build_int_cst (unsigned_type_node,
15570 FPC_FLAGS_MASK));
15571 raise_old_except = build2 (RSHIFT_EXPR, unsigned_type_node, raise_old_except,
15572 build_int_cst (unsigned_type_node,
15573 FPC_FLAGS_SHIFT));
15574 tree atomic_feraiseexcept
15575 = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
15576 raise_old_except = build_call_expr (atomic_feraiseexcept,
15577 1, raise_old_except);
15578
15579 *update = build2 (COMPOUND_EXPR, void_type_node,
15580 build2 (COMPOUND_EXPR, void_type_node,
15581 store_old_fpc, set_new_fpc),
15582 raise_old_except);
15583
15584 #undef FPC_EXCEPTION_MASK
15585 #undef FPC_FLAGS_MASK
15586 #undef FPC_DXC_MASK
15587 #undef FPC_EXCEPTION_MASK_SHIFT
15588 #undef FPC_FLAGS_SHIFT
15589 #undef FPC_DXC_SHIFT
15590 }
15591
15592 /* Return the vector mode to be used for inner mode MODE when doing
15593 vectorization. */
15594 static machine_mode
15595 s390_preferred_simd_mode (scalar_mode mode)
15596 {
15597 if (TARGET_VX)
15598 switch (mode)
15599 {
15600 case E_DFmode:
15601 return V2DFmode;
15602 case E_DImode:
15603 return V2DImode;
15604 case E_SImode:
15605 return V4SImode;
15606 case E_HImode:
15607 return V8HImode;
15608 case E_QImode:
15609 return V16QImode;
15610 default:;
15611 }
15612 return word_mode;
15613 }
15614
15615 /* Our hardware does not require vectors to be strictly aligned. */
15616 static bool
15617 s390_support_vector_misalignment (machine_mode mode ATTRIBUTE_UNUSED,
15618 const_tree type ATTRIBUTE_UNUSED,
15619 int misalignment ATTRIBUTE_UNUSED,
15620 bool is_packed ATTRIBUTE_UNUSED)
15621 {
15622 if (TARGET_VX)
15623 return true;
15624
15625 return default_builtin_support_vector_misalignment (mode, type, misalignment,
15626 is_packed);
15627 }
15628
15629 /* The vector ABI requires vector types to be aligned on an 8 byte
15630 boundary (our stack alignment). However, we allow this to be
15631 overriden by the user, while this definitely breaks the ABI. */
15632 static HOST_WIDE_INT
15633 s390_vector_alignment (const_tree type)
15634 {
15635 if (!TARGET_VX_ABI)
15636 return default_vector_alignment (type);
15637
15638 if (TYPE_USER_ALIGN (type))
15639 return TYPE_ALIGN (type);
15640
15641 return MIN (64, tree_to_shwi (TYPE_SIZE (type)));
15642 }
15643
15644 #ifdef HAVE_AS_MACHINE_MACHINEMODE
15645 /* Implement TARGET_ASM_FILE_START. */
15646 static void
15647 s390_asm_file_start (void)
15648 {
15649 default_file_start ();
15650 s390_asm_output_machine_for_arch (asm_out_file);
15651 }
15652 #endif
15653
15654 /* Implement TARGET_ASM_FILE_END. */
15655 static void
15656 s390_asm_file_end (void)
15657 {
15658 #ifdef HAVE_AS_GNU_ATTRIBUTE
15659 varpool_node *vnode;
15660 cgraph_node *cnode;
15661
15662 FOR_EACH_VARIABLE (vnode)
15663 if (TREE_PUBLIC (vnode->decl))
15664 s390_check_type_for_vector_abi (TREE_TYPE (vnode->decl), false, false);
15665
15666 FOR_EACH_FUNCTION (cnode)
15667 if (TREE_PUBLIC (cnode->decl))
15668 s390_check_type_for_vector_abi (TREE_TYPE (cnode->decl), false, false);
15669
15670
15671 if (s390_vector_abi != 0)
15672 fprintf (asm_out_file, "\t.gnu_attribute 8, %d\n",
15673 s390_vector_abi);
15674 #endif
15675 file_end_indicate_exec_stack ();
15676
15677 if (flag_split_stack)
15678 file_end_indicate_split_stack ();
15679 }
15680
15681 /* Return true if TYPE is a vector bool type. */
15682 static inline bool
15683 s390_vector_bool_type_p (const_tree type)
15684 {
15685 return TYPE_VECTOR_OPAQUE (type);
15686 }
15687
15688 /* Return the diagnostic message string if the binary operation OP is
15689 not permitted on TYPE1 and TYPE2, NULL otherwise. */
15690 static const char*
15691 s390_invalid_binary_op (int op ATTRIBUTE_UNUSED, const_tree type1, const_tree type2)
15692 {
15693 bool bool1_p, bool2_p;
15694 bool plusminus_p;
15695 bool muldiv_p;
15696 bool compare_p;
15697 machine_mode mode1, mode2;
15698
15699 if (!TARGET_ZVECTOR)
15700 return NULL;
15701
15702 if (!VECTOR_TYPE_P (type1) || !VECTOR_TYPE_P (type2))
15703 return NULL;
15704
15705 bool1_p = s390_vector_bool_type_p (type1);
15706 bool2_p = s390_vector_bool_type_p (type2);
15707
15708 /* Mixing signed and unsigned types is forbidden for all
15709 operators. */
15710 if (!bool1_p && !bool2_p
15711 && TYPE_UNSIGNED (type1) != TYPE_UNSIGNED (type2))
15712 return N_("types differ in signedness");
15713
15714 plusminus_p = (op == PLUS_EXPR || op == MINUS_EXPR);
15715 muldiv_p = (op == MULT_EXPR || op == RDIV_EXPR || op == TRUNC_DIV_EXPR
15716 || op == CEIL_DIV_EXPR || op == FLOOR_DIV_EXPR
15717 || op == ROUND_DIV_EXPR);
15718 compare_p = (op == LT_EXPR || op == LE_EXPR || op == GT_EXPR || op == GE_EXPR
15719 || op == EQ_EXPR || op == NE_EXPR);
15720
15721 if (bool1_p && bool2_p && (plusminus_p || muldiv_p))
15722 return N_("binary operator does not support two vector bool operands");
15723
15724 if (bool1_p != bool2_p && (muldiv_p || compare_p))
15725 return N_("binary operator does not support vector bool operand");
15726
15727 mode1 = TYPE_MODE (type1);
15728 mode2 = TYPE_MODE (type2);
15729
15730 if (bool1_p != bool2_p && plusminus_p
15731 && (GET_MODE_CLASS (mode1) == MODE_VECTOR_FLOAT
15732 || GET_MODE_CLASS (mode2) == MODE_VECTOR_FLOAT))
15733 return N_("binary operator does not support mixing vector "
15734 "bool with floating point vector operands");
15735
15736 return NULL;
15737 }
15738
15739 /* Implement TARGET_C_EXCESS_PRECISION.
15740
15741 FIXME: For historical reasons, float_t and double_t are typedef'ed to
15742 double on s390, causing operations on float_t to operate in a higher
15743 precision than is necessary. However, it is not the case that SFmode
15744 operations have implicit excess precision, and we generate more optimal
15745 code if we let the compiler know no implicit extra precision is added.
15746
15747 That means when we are compiling with -fexcess-precision=fast, the value
15748 we set for FLT_EVAL_METHOD will be out of line with the actual precision of
15749 float_t (though they would be correct for -fexcess-precision=standard).
15750
15751 A complete fix would modify glibc to remove the unnecessary typedef
15752 of float_t to double. */
15753
15754 static enum flt_eval_method
15755 s390_excess_precision (enum excess_precision_type type)
15756 {
15757 switch (type)
15758 {
15759 case EXCESS_PRECISION_TYPE_IMPLICIT:
15760 case EXCESS_PRECISION_TYPE_FAST:
15761 /* The fastest type to promote to will always be the native type,
15762 whether that occurs with implicit excess precision or
15763 otherwise. */
15764 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT;
15765 case EXCESS_PRECISION_TYPE_STANDARD:
15766 /* Otherwise, when we are in a standards compliant mode, to
15767 ensure consistency with the implementation in glibc, report that
15768 float is evaluated to the range and precision of double. */
15769 return FLT_EVAL_METHOD_PROMOTE_TO_DOUBLE;
15770 default:
15771 gcc_unreachable ();
15772 }
15773 return FLT_EVAL_METHOD_UNPREDICTABLE;
15774 }
15775
15776 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
15777
15778 static unsigned HOST_WIDE_INT
15779 s390_asan_shadow_offset (void)
15780 {
15781 return TARGET_64BIT ? HOST_WIDE_INT_1U << 52 : HOST_WIDE_INT_UC (0x20000000);
15782 }
15783
15784 /* Initialize GCC target structure. */
15785
15786 #undef TARGET_ASM_ALIGNED_HI_OP
15787 #define TARGET_ASM_ALIGNED_HI_OP "\t.word\t"
15788 #undef TARGET_ASM_ALIGNED_DI_OP
15789 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
15790 #undef TARGET_ASM_INTEGER
15791 #define TARGET_ASM_INTEGER s390_assemble_integer
15792
15793 #undef TARGET_ASM_OPEN_PAREN
15794 #define TARGET_ASM_OPEN_PAREN ""
15795
15796 #undef TARGET_ASM_CLOSE_PAREN
15797 #define TARGET_ASM_CLOSE_PAREN ""
15798
15799 #undef TARGET_OPTION_OVERRIDE
15800 #define TARGET_OPTION_OVERRIDE s390_option_override
15801
15802 #ifdef TARGET_THREAD_SSP_OFFSET
15803 #undef TARGET_STACK_PROTECT_GUARD
15804 #define TARGET_STACK_PROTECT_GUARD hook_tree_void_null
15805 #endif
15806
15807 #undef TARGET_ENCODE_SECTION_INFO
15808 #define TARGET_ENCODE_SECTION_INFO s390_encode_section_info
15809
15810 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15811 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15812
15813 #ifdef HAVE_AS_TLS
15814 #undef TARGET_HAVE_TLS
15815 #define TARGET_HAVE_TLS true
15816 #endif
15817 #undef TARGET_CANNOT_FORCE_CONST_MEM
15818 #define TARGET_CANNOT_FORCE_CONST_MEM s390_cannot_force_const_mem
15819
15820 #undef TARGET_DELEGITIMIZE_ADDRESS
15821 #define TARGET_DELEGITIMIZE_ADDRESS s390_delegitimize_address
15822
15823 #undef TARGET_LEGITIMIZE_ADDRESS
15824 #define TARGET_LEGITIMIZE_ADDRESS s390_legitimize_address
15825
15826 #undef TARGET_RETURN_IN_MEMORY
15827 #define TARGET_RETURN_IN_MEMORY s390_return_in_memory
15828
15829 #undef TARGET_INIT_BUILTINS
15830 #define TARGET_INIT_BUILTINS s390_init_builtins
15831 #undef TARGET_EXPAND_BUILTIN
15832 #define TARGET_EXPAND_BUILTIN s390_expand_builtin
15833 #undef TARGET_BUILTIN_DECL
15834 #define TARGET_BUILTIN_DECL s390_builtin_decl
15835
15836 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
15837 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA s390_output_addr_const_extra
15838
15839 #undef TARGET_ASM_OUTPUT_MI_THUNK
15840 #define TARGET_ASM_OUTPUT_MI_THUNK s390_output_mi_thunk
15841 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
15842 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
15843
15844 #undef TARGET_C_EXCESS_PRECISION
15845 #define TARGET_C_EXCESS_PRECISION s390_excess_precision
15846
15847 #undef TARGET_SCHED_ADJUST_PRIORITY
15848 #define TARGET_SCHED_ADJUST_PRIORITY s390_adjust_priority
15849 #undef TARGET_SCHED_ISSUE_RATE
15850 #define TARGET_SCHED_ISSUE_RATE s390_issue_rate
15851 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
15852 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD s390_first_cycle_multipass_dfa_lookahead
15853
15854 #undef TARGET_SCHED_VARIABLE_ISSUE
15855 #define TARGET_SCHED_VARIABLE_ISSUE s390_sched_variable_issue
15856 #undef TARGET_SCHED_REORDER
15857 #define TARGET_SCHED_REORDER s390_sched_reorder
15858 #undef TARGET_SCHED_INIT
15859 #define TARGET_SCHED_INIT s390_sched_init
15860
15861 #undef TARGET_CANNOT_COPY_INSN_P
15862 #define TARGET_CANNOT_COPY_INSN_P s390_cannot_copy_insn_p
15863 #undef TARGET_RTX_COSTS
15864 #define TARGET_RTX_COSTS s390_rtx_costs
15865 #undef TARGET_ADDRESS_COST
15866 #define TARGET_ADDRESS_COST s390_address_cost
15867 #undef TARGET_REGISTER_MOVE_COST
15868 #define TARGET_REGISTER_MOVE_COST s390_register_move_cost
15869 #undef TARGET_MEMORY_MOVE_COST
15870 #define TARGET_MEMORY_MOVE_COST s390_memory_move_cost
15871 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
15872 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
15873 s390_builtin_vectorization_cost
15874
15875 #undef TARGET_MACHINE_DEPENDENT_REORG
15876 #define TARGET_MACHINE_DEPENDENT_REORG s390_reorg
15877
15878 #undef TARGET_VALID_POINTER_MODE
15879 #define TARGET_VALID_POINTER_MODE s390_valid_pointer_mode
15880
15881 #undef TARGET_BUILD_BUILTIN_VA_LIST
15882 #define TARGET_BUILD_BUILTIN_VA_LIST s390_build_builtin_va_list
15883 #undef TARGET_EXPAND_BUILTIN_VA_START
15884 #define TARGET_EXPAND_BUILTIN_VA_START s390_va_start
15885 #undef TARGET_ASAN_SHADOW_OFFSET
15886 #define TARGET_ASAN_SHADOW_OFFSET s390_asan_shadow_offset
15887 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
15888 #define TARGET_GIMPLIFY_VA_ARG_EXPR s390_gimplify_va_arg
15889
15890 #undef TARGET_PROMOTE_FUNCTION_MODE
15891 #define TARGET_PROMOTE_FUNCTION_MODE s390_promote_function_mode
15892 #undef TARGET_PASS_BY_REFERENCE
15893 #define TARGET_PASS_BY_REFERENCE s390_pass_by_reference
15894
15895 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
15896 #define TARGET_FUNCTION_OK_FOR_SIBCALL s390_function_ok_for_sibcall
15897 #undef TARGET_FUNCTION_ARG
15898 #define TARGET_FUNCTION_ARG s390_function_arg
15899 #undef TARGET_FUNCTION_ARG_ADVANCE
15900 #define TARGET_FUNCTION_ARG_ADVANCE s390_function_arg_advance
15901 #undef TARGET_FUNCTION_ARG_PADDING
15902 #define TARGET_FUNCTION_ARG_PADDING s390_function_arg_padding
15903 #undef TARGET_FUNCTION_VALUE
15904 #define TARGET_FUNCTION_VALUE s390_function_value
15905 #undef TARGET_LIBCALL_VALUE
15906 #define TARGET_LIBCALL_VALUE s390_libcall_value
15907 #undef TARGET_STRICT_ARGUMENT_NAMING
15908 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
15909
15910 #undef TARGET_KEEP_LEAF_WHEN_PROFILED
15911 #define TARGET_KEEP_LEAF_WHEN_PROFILED s390_keep_leaf_when_profiled
15912
15913 #undef TARGET_FIXED_CONDITION_CODE_REGS
15914 #define TARGET_FIXED_CONDITION_CODE_REGS s390_fixed_condition_code_regs
15915
15916 #undef TARGET_CC_MODES_COMPATIBLE
15917 #define TARGET_CC_MODES_COMPATIBLE s390_cc_modes_compatible
15918
15919 #undef TARGET_INVALID_WITHIN_DOLOOP
15920 #define TARGET_INVALID_WITHIN_DOLOOP hook_constcharptr_const_rtx_insn_null
15921
15922 #ifdef HAVE_AS_TLS
15923 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
15924 #define TARGET_ASM_OUTPUT_DWARF_DTPREL s390_output_dwarf_dtprel
15925 #endif
15926
15927 #undef TARGET_DWARF_FRAME_REG_MODE
15928 #define TARGET_DWARF_FRAME_REG_MODE s390_dwarf_frame_reg_mode
15929
15930 #ifdef TARGET_ALTERNATE_LONG_DOUBLE_MANGLING
15931 #undef TARGET_MANGLE_TYPE
15932 #define TARGET_MANGLE_TYPE s390_mangle_type
15933 #endif
15934
15935 #undef TARGET_SCALAR_MODE_SUPPORTED_P
15936 #define TARGET_SCALAR_MODE_SUPPORTED_P s390_scalar_mode_supported_p
15937
15938 #undef TARGET_VECTOR_MODE_SUPPORTED_P
15939 #define TARGET_VECTOR_MODE_SUPPORTED_P s390_vector_mode_supported_p
15940
15941 #undef TARGET_PREFERRED_RELOAD_CLASS
15942 #define TARGET_PREFERRED_RELOAD_CLASS s390_preferred_reload_class
15943
15944 #undef TARGET_SECONDARY_RELOAD
15945 #define TARGET_SECONDARY_RELOAD s390_secondary_reload
15946
15947 #undef TARGET_LIBGCC_CMP_RETURN_MODE
15948 #define TARGET_LIBGCC_CMP_RETURN_MODE s390_libgcc_cmp_return_mode
15949
15950 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
15951 #define TARGET_LIBGCC_SHIFT_COUNT_MODE s390_libgcc_shift_count_mode
15952
15953 #undef TARGET_LEGITIMATE_ADDRESS_P
15954 #define TARGET_LEGITIMATE_ADDRESS_P s390_legitimate_address_p
15955
15956 #undef TARGET_LEGITIMATE_CONSTANT_P
15957 #define TARGET_LEGITIMATE_CONSTANT_P s390_legitimate_constant_p
15958
15959 #undef TARGET_LRA_P
15960 #define TARGET_LRA_P s390_lra_p
15961
15962 #undef TARGET_CAN_ELIMINATE
15963 #define TARGET_CAN_ELIMINATE s390_can_eliminate
15964
15965 #undef TARGET_CONDITIONAL_REGISTER_USAGE
15966 #define TARGET_CONDITIONAL_REGISTER_USAGE s390_conditional_register_usage
15967
15968 #undef TARGET_LOOP_UNROLL_ADJUST
15969 #define TARGET_LOOP_UNROLL_ADJUST s390_loop_unroll_adjust
15970
15971 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
15972 #define TARGET_ASM_TRAMPOLINE_TEMPLATE s390_asm_trampoline_template
15973 #undef TARGET_TRAMPOLINE_INIT
15974 #define TARGET_TRAMPOLINE_INIT s390_trampoline_init
15975
15976 /* PR 79421 */
15977 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
15978 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 1
15979
15980 #undef TARGET_UNWIND_WORD_MODE
15981 #define TARGET_UNWIND_WORD_MODE s390_unwind_word_mode
15982
15983 #undef TARGET_CANONICALIZE_COMPARISON
15984 #define TARGET_CANONICALIZE_COMPARISON s390_canonicalize_comparison
15985
15986 #undef TARGET_HARD_REGNO_SCRATCH_OK
15987 #define TARGET_HARD_REGNO_SCRATCH_OK s390_hard_regno_scratch_ok
15988
15989 #undef TARGET_HARD_REGNO_MODE_OK
15990 #define TARGET_HARD_REGNO_MODE_OK s390_hard_regno_mode_ok
15991 #undef TARGET_MODES_TIEABLE_P
15992 #define TARGET_MODES_TIEABLE_P s390_modes_tieable_p
15993
15994 #undef TARGET_HARD_REGNO_CALL_PART_CLOBBERED
15995 #define TARGET_HARD_REGNO_CALL_PART_CLOBBERED \
15996 s390_hard_regno_call_part_clobbered
15997
15998 #undef TARGET_ATTRIBUTE_TABLE
15999 #define TARGET_ATTRIBUTE_TABLE s390_attribute_table
16000
16001 #undef TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P
16002 #define TARGET_FUNCTION_ATTRIBUTE_INLINABLE_P hook_bool_const_tree_true
16003
16004 #undef TARGET_SET_UP_BY_PROLOGUE
16005 #define TARGET_SET_UP_BY_PROLOGUE s300_set_up_by_prologue
16006
16007 #undef TARGET_EXTRA_LIVE_ON_ENTRY
16008 #define TARGET_EXTRA_LIVE_ON_ENTRY s390_live_on_entry
16009
16010 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
16011 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
16012 s390_use_by_pieces_infrastructure_p
16013
16014 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
16015 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV s390_atomic_assign_expand_fenv
16016
16017 #undef TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN
16018 #define TARGET_INVALID_ARG_FOR_UNPROTOTYPED_FN s390_invalid_arg_for_unprototyped_fn
16019
16020 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
16021 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE s390_preferred_simd_mode
16022
16023 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
16024 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT s390_support_vector_misalignment
16025
16026 #undef TARGET_VECTOR_ALIGNMENT
16027 #define TARGET_VECTOR_ALIGNMENT s390_vector_alignment
16028
16029 #undef TARGET_INVALID_BINARY_OP
16030 #define TARGET_INVALID_BINARY_OP s390_invalid_binary_op
16031
16032 #ifdef HAVE_AS_MACHINE_MACHINEMODE
16033 #undef TARGET_ASM_FILE_START
16034 #define TARGET_ASM_FILE_START s390_asm_file_start
16035 #endif
16036
16037 #undef TARGET_ASM_FILE_END
16038 #define TARGET_ASM_FILE_END s390_asm_file_end
16039
16040 #if S390_USE_TARGET_ATTRIBUTE
16041 #undef TARGET_SET_CURRENT_FUNCTION
16042 #define TARGET_SET_CURRENT_FUNCTION s390_set_current_function
16043
16044 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
16045 #define TARGET_OPTION_VALID_ATTRIBUTE_P s390_valid_target_attribute_p
16046
16047 #undef TARGET_CAN_INLINE_P
16048 #define TARGET_CAN_INLINE_P s390_can_inline_p
16049 #endif
16050
16051 #undef TARGET_OPTION_RESTORE
16052 #define TARGET_OPTION_RESTORE s390_function_specific_restore
16053
16054 struct gcc_target targetm = TARGET_INITIALIZER;
16055
16056 #include "gt-s390.h"