0cfb471d2971bea6bbcb672feb6899a6d2084a88
[gcc.git] / gcc / config / bfin / bfin.c
1 /* The Blackfin code generation auxiliary output file.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Analog Devices.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "insn-codes.h"
31 #include "conditions.h"
32 #include "insn-flags.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "tree.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "input.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "expr.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "optabs.h"
46 #include "ggc.h"
47 #include "integrate.h"
48 #include "cgraph.h"
49 #include "langhooks.h"
50 #include "bfin-protos.h"
51 #include "tm-preds.h"
52 #include "tm-constrs.h"
53 #include "gt-bfin.h"
54 #include "basic-block.h"
55 #include "cfglayout.h"
56 #include "timevar.h"
57 #include "df.h"
58
59 /* A C structure for machine-specific, per-function data.
60 This is added to the cfun structure. */
61 struct GTY(()) machine_function
62 {
63 /* Set if we are notified by the doloop pass that a hardware loop
64 was created. */
65 int has_hardware_loops;
66
67 /* Set if we create a memcpy pattern that uses loop registers. */
68 int has_loopreg_clobber;
69 };
70
71 /* RTX for condition code flag register and RETS register */
72 extern GTY(()) rtx bfin_cc_rtx;
73 extern GTY(()) rtx bfin_rets_rtx;
74 rtx bfin_cc_rtx, bfin_rets_rtx;
75
76 int max_arg_registers = 0;
77
78 /* Arrays used when emitting register names. */
79 const char *short_reg_names[] = SHORT_REGISTER_NAMES;
80 const char *high_reg_names[] = HIGH_REGISTER_NAMES;
81 const char *dregs_pair_names[] = DREGS_PAIR_NAMES;
82 const char *byte_reg_names[] = BYTE_REGISTER_NAMES;
83
84 static int arg_regs[] = FUNCTION_ARG_REGISTERS;
85 static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
86
87 /* Nonzero if -mshared-library-id was given. */
88 static int bfin_lib_id_given;
89
90 /* Nonzero if -fschedule-insns2 was given. We override it and
91 call the scheduler ourselves during reorg. */
92 static int bfin_flag_schedule_insns2;
93
94 /* Determines whether we run variable tracking in machine dependent
95 reorganization. */
96 static int bfin_flag_var_tracking;
97
98 /* -mcpu support */
99 bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN;
100
101 /* -msi-revision support. There are three special values:
102 -1 -msi-revision=none.
103 0xffff -msi-revision=any. */
104 int bfin_si_revision;
105
106 /* The workarounds enabled */
107 unsigned int bfin_workarounds = 0;
108
109 struct bfin_cpu
110 {
111 const char *name;
112 bfin_cpu_t type;
113 int si_revision;
114 unsigned int workarounds;
115 };
116
117 struct bfin_cpu bfin_cpus[] =
118 {
119 {"bf512", BFIN_CPU_BF512, 0x0000,
120 WA_SPECULATIVE_LOADS},
121
122 {"bf514", BFIN_CPU_BF514, 0x0000,
123 WA_SPECULATIVE_LOADS},
124
125 {"bf516", BFIN_CPU_BF516, 0x0000,
126 WA_SPECULATIVE_LOADS},
127
128 {"bf518", BFIN_CPU_BF518, 0x0000,
129 WA_SPECULATIVE_LOADS},
130
131 {"bf522", BFIN_CPU_BF522, 0x0002,
132 WA_SPECULATIVE_LOADS},
133 {"bf522", BFIN_CPU_BF522, 0x0001,
134 WA_SPECULATIVE_LOADS | WA_RETS},
135 {"bf522", BFIN_CPU_BF522, 0x0000,
136 WA_SPECULATIVE_LOADS | WA_RETS},
137
138 {"bf523", BFIN_CPU_BF523, 0x0002,
139 WA_SPECULATIVE_LOADS},
140 {"bf523", BFIN_CPU_BF523, 0x0001,
141 WA_SPECULATIVE_LOADS | WA_RETS},
142 {"bf523", BFIN_CPU_BF523, 0x0000,
143 WA_SPECULATIVE_LOADS | WA_RETS},
144
145 {"bf524", BFIN_CPU_BF524, 0x0002,
146 WA_SPECULATIVE_LOADS},
147 {"bf524", BFIN_CPU_BF524, 0x0001,
148 WA_SPECULATIVE_LOADS | WA_RETS},
149 {"bf524", BFIN_CPU_BF524, 0x0000,
150 WA_SPECULATIVE_LOADS | WA_RETS},
151
152 {"bf525", BFIN_CPU_BF525, 0x0002,
153 WA_SPECULATIVE_LOADS},
154 {"bf525", BFIN_CPU_BF525, 0x0001,
155 WA_SPECULATIVE_LOADS | WA_RETS},
156 {"bf525", BFIN_CPU_BF525, 0x0000,
157 WA_SPECULATIVE_LOADS | WA_RETS},
158
159 {"bf526", BFIN_CPU_BF526, 0x0002,
160 WA_SPECULATIVE_LOADS},
161 {"bf526", BFIN_CPU_BF526, 0x0001,
162 WA_SPECULATIVE_LOADS | WA_RETS},
163 {"bf526", BFIN_CPU_BF526, 0x0000,
164 WA_SPECULATIVE_LOADS | WA_RETS},
165
166 {"bf527", BFIN_CPU_BF527, 0x0002,
167 WA_SPECULATIVE_LOADS},
168 {"bf527", BFIN_CPU_BF527, 0x0001,
169 WA_SPECULATIVE_LOADS | WA_RETS},
170 {"bf527", BFIN_CPU_BF527, 0x0000,
171 WA_SPECULATIVE_LOADS | WA_RETS},
172
173 {"bf531", BFIN_CPU_BF531, 0x0006,
174 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
175 {"bf531", BFIN_CPU_BF531, 0x0005,
176 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
177 {"bf531", BFIN_CPU_BF531, 0x0004,
178 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
179 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
180 {"bf531", BFIN_CPU_BF531, 0x0003,
181 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
182 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
183
184 {"bf532", BFIN_CPU_BF532, 0x0006,
185 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
186 {"bf532", BFIN_CPU_BF532, 0x0005,
187 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
188 {"bf532", BFIN_CPU_BF532, 0x0004,
189 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
190 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
191 {"bf532", BFIN_CPU_BF532, 0x0003,
192 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
193 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
194
195 {"bf533", BFIN_CPU_BF533, 0x0006,
196 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
197 {"bf533", BFIN_CPU_BF533, 0x0005,
198 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
199 {"bf533", BFIN_CPU_BF533, 0x0004,
200 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
201 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
202 {"bf533", BFIN_CPU_BF533, 0x0003,
203 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
204 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
205
206 {"bf534", BFIN_CPU_BF534, 0x0003,
207 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
208 {"bf534", BFIN_CPU_BF534, 0x0002,
209 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
210 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
211 {"bf534", BFIN_CPU_BF534, 0x0001,
212 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
213 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
214
215 {"bf536", BFIN_CPU_BF536, 0x0003,
216 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
217 {"bf536", BFIN_CPU_BF536, 0x0002,
218 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
219 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
220 {"bf536", BFIN_CPU_BF536, 0x0001,
221 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
222 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
223
224 {"bf537", BFIN_CPU_BF537, 0x0003,
225 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
226 {"bf537", BFIN_CPU_BF537, 0x0002,
227 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
228 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
229 {"bf537", BFIN_CPU_BF537, 0x0001,
230 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
231 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
232
233 {"bf538", BFIN_CPU_BF538, 0x0005,
234 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
235 {"bf538", BFIN_CPU_BF538, 0x0004,
236 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
237 {"bf538", BFIN_CPU_BF538, 0x0003,
238 WA_SPECULATIVE_LOADS | WA_RETS
239 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
240 {"bf538", BFIN_CPU_BF538, 0x0002,
241 WA_SPECULATIVE_LOADS | WA_RETS
242 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
243
244 {"bf539", BFIN_CPU_BF539, 0x0005,
245 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS},
246 {"bf539", BFIN_CPU_BF539, 0x0004,
247 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS},
248 {"bf539", BFIN_CPU_BF539, 0x0003,
249 WA_SPECULATIVE_LOADS | WA_RETS
250 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
251 {"bf539", BFIN_CPU_BF539, 0x0002,
252 WA_SPECULATIVE_LOADS | WA_RETS
253 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
254
255 {"bf542", BFIN_CPU_BF542, 0x0002,
256 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
257 {"bf542", BFIN_CPU_BF542, 0x0001,
258 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
259 {"bf542", BFIN_CPU_BF542, 0x0000,
260 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
261
262 {"bf544", BFIN_CPU_BF544, 0x0002,
263 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
264 {"bf544", BFIN_CPU_BF544, 0x0001,
265 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
266 {"bf544", BFIN_CPU_BF544, 0x0000,
267 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
268
269 {"bf547", BFIN_CPU_BF547, 0x0002,
270 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
271 {"bf547", BFIN_CPU_BF547, 0x0001,
272 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
273 {"bf547", BFIN_CPU_BF547, 0x0000,
274 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
275
276 {"bf548", BFIN_CPU_BF548, 0x0002,
277 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
278 {"bf548", BFIN_CPU_BF548, 0x0001,
279 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
280 {"bf548", BFIN_CPU_BF548, 0x0000,
281 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
282
283 {"bf549", BFIN_CPU_BF549, 0x0002,
284 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS},
285 {"bf549", BFIN_CPU_BF549, 0x0001,
286 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS},
287 {"bf549", BFIN_CPU_BF549, 0x0000,
288 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS},
289
290 {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS
291 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS},
292 {"bf561", BFIN_CPU_BF561, 0x0003,
293 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
294 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
295 {"bf561", BFIN_CPU_BF561, 0x0002,
296 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
297 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS},
298
299 {NULL, 0, 0, 0}
300 };
301
302 int splitting_for_sched, splitting_loops;
303
304 static void
305 bfin_globalize_label (FILE *stream, const char *name)
306 {
307 fputs (".global ", stream);
308 assemble_name (stream, name);
309 fputc (';',stream);
310 fputc ('\n',stream);
311 }
312
313 static void
314 output_file_start (void)
315 {
316 FILE *file = asm_out_file;
317 int i;
318
319 /* Variable tracking should be run after all optimizations which change order
320 of insns. It also needs a valid CFG. This can't be done in
321 override_options, because flag_var_tracking is finalized after
322 that. */
323 bfin_flag_var_tracking = flag_var_tracking;
324 flag_var_tracking = 0;
325
326 fprintf (file, ".file \"%s\";\n", input_filename);
327
328 for (i = 0; arg_regs[i] >= 0; i++)
329 ;
330 max_arg_registers = i; /* how many arg reg used */
331 }
332
333 /* Called early in the compilation to conditionally modify
334 fixed_regs/call_used_regs. */
335
336 void
337 conditional_register_usage (void)
338 {
339 /* initialize condition code flag register rtx */
340 bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
341 bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
342 }
343
344 /* Examine machine-dependent attributes of function type FUNTYPE and return its
345 type. See the definition of E_FUNKIND. */
346
347 static e_funkind
348 funkind (const_tree funtype)
349 {
350 tree attrs = TYPE_ATTRIBUTES (funtype);
351 if (lookup_attribute ("interrupt_handler", attrs))
352 return INTERRUPT_HANDLER;
353 else if (lookup_attribute ("exception_handler", attrs))
354 return EXCPT_HANDLER;
355 else if (lookup_attribute ("nmi_handler", attrs))
356 return NMI_HANDLER;
357 else
358 return SUBROUTINE;
359 }
360 \f
361 /* Legitimize PIC addresses. If the address is already position-independent,
362 we return ORIG. Newly generated position-independent addresses go into a
363 reg. This is REG if nonzero, otherwise we allocate register(s) as
364 necessary. PICREG is the register holding the pointer to the PIC offset
365 table. */
366
367 static rtx
368 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
369 {
370 rtx addr = orig;
371 rtx new_rtx = orig;
372
373 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
374 {
375 int unspec;
376 rtx tmp;
377
378 if (TARGET_ID_SHARED_LIBRARY)
379 unspec = UNSPEC_MOVE_PIC;
380 else if (GET_CODE (addr) == SYMBOL_REF
381 && SYMBOL_REF_FUNCTION_P (addr))
382 unspec = UNSPEC_FUNCDESC_GOT17M4;
383 else
384 unspec = UNSPEC_MOVE_FDPIC;
385
386 if (reg == 0)
387 {
388 gcc_assert (can_create_pseudo_p ());
389 reg = gen_reg_rtx (Pmode);
390 }
391
392 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
393 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
394
395 emit_move_insn (reg, new_rtx);
396 if (picreg == pic_offset_table_rtx)
397 crtl->uses_pic_offset_table = 1;
398 return reg;
399 }
400
401 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
402 {
403 rtx base;
404
405 if (GET_CODE (addr) == CONST)
406 {
407 addr = XEXP (addr, 0);
408 gcc_assert (GET_CODE (addr) == PLUS);
409 }
410
411 if (XEXP (addr, 0) == picreg)
412 return orig;
413
414 if (reg == 0)
415 {
416 gcc_assert (can_create_pseudo_p ());
417 reg = gen_reg_rtx (Pmode);
418 }
419
420 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
421 addr = legitimize_pic_address (XEXP (addr, 1),
422 base == reg ? NULL_RTX : reg,
423 picreg);
424
425 if (GET_CODE (addr) == CONST_INT)
426 {
427 gcc_assert (! reload_in_progress && ! reload_completed);
428 addr = force_reg (Pmode, addr);
429 }
430
431 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
432 {
433 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
434 addr = XEXP (addr, 1);
435 }
436
437 return gen_rtx_PLUS (Pmode, base, addr);
438 }
439
440 return new_rtx;
441 }
442 \f
443 /* Stack frame layout. */
444
445 /* For a given REGNO, determine whether it must be saved in the function
446 prologue. IS_INTHANDLER specifies whether we're generating a normal
447 prologue or an interrupt/exception one. */
448 static bool
449 must_save_p (bool is_inthandler, unsigned regno)
450 {
451 if (D_REGNO_P (regno))
452 {
453 bool is_eh_return_reg = false;
454 if (crtl->calls_eh_return)
455 {
456 unsigned j;
457 for (j = 0; ; j++)
458 {
459 unsigned test = EH_RETURN_DATA_REGNO (j);
460 if (test == INVALID_REGNUM)
461 break;
462 if (test == regno)
463 is_eh_return_reg = true;
464 }
465 }
466
467 return (is_eh_return_reg
468 || (df_regs_ever_live_p (regno)
469 && !fixed_regs[regno]
470 && (is_inthandler || !call_used_regs[regno])));
471 }
472 else if (P_REGNO_P (regno))
473 {
474 return ((df_regs_ever_live_p (regno)
475 && !fixed_regs[regno]
476 && (is_inthandler || !call_used_regs[regno]))
477 || (is_inthandler
478 && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
479 && regno == REG_P5)
480 || (!TARGET_FDPIC
481 && regno == PIC_OFFSET_TABLE_REGNUM
482 && (crtl->uses_pic_offset_table
483 || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf))));
484 }
485 else
486 return ((is_inthandler || !call_used_regs[regno])
487 && (df_regs_ever_live_p (regno)
488 || (!leaf_function_p () && call_used_regs[regno])));
489
490 }
491
492 /* Compute the number of DREGS to save with a push_multiple operation.
493 This could include registers that aren't modified in the function,
494 since push_multiple only takes a range of registers.
495 If IS_INTHANDLER, then everything that is live must be saved, even
496 if normally call-clobbered.
497 If CONSECUTIVE, return the number of registers we can save in one
498 instruction with a push/pop multiple instruction. */
499
500 static int
501 n_dregs_to_save (bool is_inthandler, bool consecutive)
502 {
503 int count = 0;
504 unsigned i;
505
506 for (i = REG_R7 + 1; i-- != REG_R0;)
507 {
508 if (must_save_p (is_inthandler, i))
509 count++;
510 else if (consecutive)
511 return count;
512 }
513 return count;
514 }
515
516 /* Like n_dregs_to_save, but compute number of PREGS to save. */
517
518 static int
519 n_pregs_to_save (bool is_inthandler, bool consecutive)
520 {
521 int count = 0;
522 unsigned i;
523
524 for (i = REG_P5 + 1; i-- != REG_P0;)
525 if (must_save_p (is_inthandler, i))
526 count++;
527 else if (consecutive)
528 return count;
529 return count;
530 }
531
532 /* Determine if we are going to save the frame pointer in the prologue. */
533
534 static bool
535 must_save_fp_p (void)
536 {
537 return df_regs_ever_live_p (REG_FP);
538 }
539
540 /* Determine if we are going to save the RETS register. */
541 static bool
542 must_save_rets_p (void)
543 {
544 return df_regs_ever_live_p (REG_RETS);
545 }
546
547 static bool
548 stack_frame_needed_p (void)
549 {
550 /* EH return puts a new return address into the frame using an
551 address relative to the frame pointer. */
552 if (crtl->calls_eh_return)
553 return true;
554 return frame_pointer_needed;
555 }
556
557 /* Emit code to save registers in the prologue. SAVEALL is nonzero if we
558 must save all registers; this is used for interrupt handlers.
559 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
560 this for an interrupt (or exception) handler. */
561
562 static void
563 expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
564 {
565 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
566 rtx predec = gen_rtx_MEM (SImode, predec1);
567 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
568 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
569 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
570 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
571 int dregno, pregno;
572 int total_consec = ndregs_consec + npregs_consec;
573 int i, d_to_save;
574
575 if (saveall || is_inthandler)
576 {
577 rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
578
579 RTX_FRAME_RELATED_P (insn) = 1;
580 for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
581 if (! current_function_is_leaf
582 || cfun->machine->has_hardware_loops
583 || cfun->machine->has_loopreg_clobber
584 || (ENABLE_WA_05000257
585 && (dregno == REG_LC0 || dregno == REG_LC1)))
586 {
587 insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
588 RTX_FRAME_RELATED_P (insn) = 1;
589 }
590 }
591
592 if (total_consec != 0)
593 {
594 rtx insn;
595 rtx val = GEN_INT (-total_consec * 4);
596 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
597
598 XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
599 UNSPEC_PUSH_MULTIPLE);
600 XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
601 gen_rtx_PLUS (Pmode,
602 spreg,
603 val));
604 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
605 d_to_save = ndregs_consec;
606 dregno = REG_R7 + 1 - ndregs_consec;
607 pregno = REG_P5 + 1 - npregs_consec;
608 for (i = 0; i < total_consec; i++)
609 {
610 rtx memref = gen_rtx_MEM (word_mode,
611 gen_rtx_PLUS (Pmode, spreg,
612 GEN_INT (- i * 4 - 4)));
613 rtx subpat;
614 if (d_to_save > 0)
615 {
616 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
617 dregno++));
618 d_to_save--;
619 }
620 else
621 {
622 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
623 pregno++));
624 }
625 XVECEXP (pat, 0, i + 1) = subpat;
626 RTX_FRAME_RELATED_P (subpat) = 1;
627 }
628 insn = emit_insn (pat);
629 RTX_FRAME_RELATED_P (insn) = 1;
630 }
631
632 for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
633 {
634 if (must_save_p (is_inthandler, dregno))
635 {
636 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
637 RTX_FRAME_RELATED_P (insn) = 1;
638 ndregs--;
639 }
640 }
641 for (pregno = REG_P0; npregs != npregs_consec; pregno++)
642 {
643 if (must_save_p (is_inthandler, pregno))
644 {
645 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
646 RTX_FRAME_RELATED_P (insn) = 1;
647 npregs--;
648 }
649 }
650 for (i = REG_P7 + 1; i < REG_CC; i++)
651 if (saveall
652 || (is_inthandler
653 && (df_regs_ever_live_p (i)
654 || (!leaf_function_p () && call_used_regs[i]))))
655 {
656 rtx insn;
657 if (i == REG_A0 || i == REG_A1)
658 insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
659 gen_rtx_REG (PDImode, i));
660 else
661 insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
662 RTX_FRAME_RELATED_P (insn) = 1;
663 }
664 }
665
666 /* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we
667 must save all registers; this is used for interrupt handlers.
668 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
669 this for an interrupt (or exception) handler. */
670
671 static void
672 expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
673 {
674 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
675 rtx postinc = gen_rtx_MEM (SImode, postinc1);
676
677 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
678 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
679 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
680 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
681 int total_consec = ndregs_consec + npregs_consec;
682 int i, regno;
683 rtx insn;
684
685 /* A slightly crude technique to stop flow from trying to delete "dead"
686 insns. */
687 MEM_VOLATILE_P (postinc) = 1;
688
689 for (i = REG_CC - 1; i > REG_P7; i--)
690 if (saveall
691 || (is_inthandler
692 && (df_regs_ever_live_p (i)
693 || (!leaf_function_p () && call_used_regs[i]))))
694 {
695 if (i == REG_A0 || i == REG_A1)
696 {
697 rtx mem = gen_rtx_MEM (PDImode, postinc1);
698 MEM_VOLATILE_P (mem) = 1;
699 emit_move_insn (gen_rtx_REG (PDImode, i), mem);
700 }
701 else
702 emit_move_insn (gen_rtx_REG (SImode, i), postinc);
703 }
704
705 regno = REG_P5 - npregs_consec;
706 for (; npregs != npregs_consec; regno--)
707 {
708 if (must_save_p (is_inthandler, regno))
709 {
710 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
711 npregs--;
712 }
713 }
714 regno = REG_R7 - ndregs_consec;
715 for (; ndregs != ndregs_consec; regno--)
716 {
717 if (must_save_p (is_inthandler, regno))
718 {
719 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
720 ndregs--;
721 }
722 }
723
724 if (total_consec != 0)
725 {
726 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
727 XVECEXP (pat, 0, 0)
728 = gen_rtx_SET (VOIDmode, spreg,
729 gen_rtx_PLUS (Pmode, spreg,
730 GEN_INT (total_consec * 4)));
731
732 if (npregs_consec > 0)
733 regno = REG_P5 + 1;
734 else
735 regno = REG_R7 + 1;
736
737 for (i = 0; i < total_consec; i++)
738 {
739 rtx addr = (i > 0
740 ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
741 : spreg);
742 rtx memref = gen_rtx_MEM (word_mode, addr);
743
744 regno--;
745 XVECEXP (pat, 0, i + 1)
746 = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
747
748 if (npregs_consec > 0)
749 {
750 if (--npregs_consec == 0)
751 regno = REG_R7 + 1;
752 }
753 }
754
755 insn = emit_insn (pat);
756 RTX_FRAME_RELATED_P (insn) = 1;
757 }
758 if (saveall || is_inthandler)
759 {
760 for (regno = REG_LB1; regno >= REG_LT0; regno--)
761 if (! current_function_is_leaf
762 || cfun->machine->has_hardware_loops
763 || cfun->machine->has_loopreg_clobber
764 || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
765 emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
766
767 emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
768 }
769 }
770
771 /* Perform any needed actions needed for a function that is receiving a
772 variable number of arguments.
773
774 CUM is as above.
775
776 MODE and TYPE are the mode and type of the current parameter.
777
778 PRETEND_SIZE is a variable that should be set to the amount of stack
779 that must be pushed by the prolog to pretend that our caller pushed
780 it.
781
782 Normally, this macro will push all remaining incoming registers on the
783 stack and set PRETEND_SIZE to the length of the registers pushed.
784
785 Blackfin specific :
786 - VDSP C compiler manual (our ABI) says that a variable args function
787 should save the R0, R1 and R2 registers in the stack.
788 - The caller will always leave space on the stack for the
789 arguments that are passed in registers, so we dont have
790 to leave any extra space.
791 - now, the vastart pointer can access all arguments from the stack. */
792
793 static void
794 setup_incoming_varargs (CUMULATIVE_ARGS *cum,
795 enum machine_mode mode ATTRIBUTE_UNUSED,
796 tree type ATTRIBUTE_UNUSED, int *pretend_size,
797 int no_rtl)
798 {
799 rtx mem;
800 int i;
801
802 if (no_rtl)
803 return;
804
805 /* The move for named arguments will be generated automatically by the
806 compiler. We need to generate the move rtx for the unnamed arguments
807 if they are in the first 3 words. We assume at least 1 named argument
808 exists, so we never generate [ARGP] = R0 here. */
809
810 for (i = cum->words + 1; i < max_arg_registers; i++)
811 {
812 mem = gen_rtx_MEM (Pmode,
813 plus_constant (arg_pointer_rtx, (i * UNITS_PER_WORD)));
814 emit_move_insn (mem, gen_rtx_REG (Pmode, i));
815 }
816
817 *pretend_size = 0;
818 }
819
820 /* Value should be nonzero if functions must have frame pointers.
821 Zero means the frame pointer need not be set up (and parms may
822 be accessed via the stack pointer) in functions that seem suitable. */
823
824 static bool
825 bfin_frame_pointer_required (void)
826 {
827 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
828
829 if (fkind != SUBROUTINE)
830 return true;
831
832 /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
833 so we have to override it for non-leaf functions. */
834 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf)
835 return true;
836
837 return false;
838 }
839
840 /* Return the number of registers pushed during the prologue. */
841
842 static int
843 n_regs_saved_by_prologue (void)
844 {
845 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
846 bool is_inthandler = fkind != SUBROUTINE;
847 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
848 bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
849 || (is_inthandler && !current_function_is_leaf));
850 int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
851 int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
852 int n = ndregs + npregs;
853 int i;
854
855 if (all || stack_frame_needed_p ())
856 n += 2;
857 else
858 {
859 if (must_save_fp_p ())
860 n++;
861 if (must_save_rets_p ())
862 n++;
863 }
864
865 if (fkind != SUBROUTINE || all)
866 {
867 /* Increment once for ASTAT. */
868 n++;
869 if (! current_function_is_leaf
870 || cfun->machine->has_hardware_loops
871 || cfun->machine->has_loopreg_clobber)
872 {
873 n += 6;
874 }
875 }
876
877 if (fkind != SUBROUTINE)
878 {
879 /* RETE/X/N. */
880 if (lookup_attribute ("nesting", attrs))
881 n++;
882 }
883
884 for (i = REG_P7 + 1; i < REG_CC; i++)
885 if (all
886 || (fkind != SUBROUTINE
887 && (df_regs_ever_live_p (i)
888 || (!leaf_function_p () && call_used_regs[i]))))
889 n += i == REG_A0 || i == REG_A1 ? 2 : 1;
890
891 return n;
892 }
893
894 /* Given FROM and TO register numbers, say whether this elimination is
895 allowed. Frame pointer elimination is automatically handled.
896
897 All other eliminations are valid. */
898
899 static bool
900 bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
901 {
902 return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
903 }
904
905 /* Return the offset between two registers, one to be eliminated, and the other
906 its replacement, at the start of a routine. */
907
908 HOST_WIDE_INT
909 bfin_initial_elimination_offset (int from, int to)
910 {
911 HOST_WIDE_INT offset = 0;
912
913 if (from == ARG_POINTER_REGNUM)
914 offset = n_regs_saved_by_prologue () * 4;
915
916 if (to == STACK_POINTER_REGNUM)
917 {
918 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
919 offset += crtl->outgoing_args_size;
920 else if (crtl->outgoing_args_size)
921 offset += FIXED_STACK_AREA;
922
923 offset += get_frame_size ();
924 }
925
926 return offset;
927 }
928
929 /* Emit code to load a constant CONSTANT into register REG; setting
930 RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
931 Make sure that the insns we generate need not be split. */
932
933 static void
934 frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
935 {
936 rtx insn;
937 rtx cst = GEN_INT (constant);
938
939 if (constant >= -32768 && constant < 65536)
940 insn = emit_move_insn (reg, cst);
941 else
942 {
943 /* We don't call split_load_immediate here, since dwarf2out.c can get
944 confused about some of the more clever sequences it can generate. */
945 insn = emit_insn (gen_movsi_high (reg, cst));
946 if (related)
947 RTX_FRAME_RELATED_P (insn) = 1;
948 insn = emit_insn (gen_movsi_low (reg, reg, cst));
949 }
950 if (related)
951 RTX_FRAME_RELATED_P (insn) = 1;
952 }
953
954 /* Generate efficient code to add a value to a P register.
955 Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
956 EPILOGUE_P is zero if this function is called for prologue,
957 otherwise it's nonzero. And it's less than zero if this is for
958 sibcall epilogue. */
959
960 static void
961 add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
962 {
963 if (value == 0)
964 return;
965
966 /* Choose whether to use a sequence using a temporary register, or
967 a sequence with multiple adds. We can add a signed 7-bit value
968 in one instruction. */
969 if (value > 120 || value < -120)
970 {
971 rtx tmpreg;
972 rtx tmpreg2;
973 rtx insn;
974
975 tmpreg2 = NULL_RTX;
976
977 /* For prologue or normal epilogue, P1 can be safely used
978 as the temporary register. For sibcall epilogue, we try to find
979 a call used P register, which will be restored in epilogue.
980 If we cannot find such a P register, we have to use one I register
981 to help us. */
982
983 if (epilogue_p >= 0)
984 tmpreg = gen_rtx_REG (SImode, REG_P1);
985 else
986 {
987 int i;
988 for (i = REG_P0; i <= REG_P5; i++)
989 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
990 || (!TARGET_FDPIC
991 && i == PIC_OFFSET_TABLE_REGNUM
992 && (crtl->uses_pic_offset_table
993 || (TARGET_ID_SHARED_LIBRARY
994 && ! current_function_is_leaf))))
995 break;
996 if (i <= REG_P5)
997 tmpreg = gen_rtx_REG (SImode, i);
998 else
999 {
1000 tmpreg = gen_rtx_REG (SImode, REG_P1);
1001 tmpreg2 = gen_rtx_REG (SImode, REG_I0);
1002 emit_move_insn (tmpreg2, tmpreg);
1003 }
1004 }
1005
1006 if (frame)
1007 frame_related_constant_load (tmpreg, value, TRUE);
1008 else
1009 insn = emit_move_insn (tmpreg, GEN_INT (value));
1010
1011 insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
1012 if (frame)
1013 RTX_FRAME_RELATED_P (insn) = 1;
1014
1015 if (tmpreg2 != NULL_RTX)
1016 emit_move_insn (tmpreg, tmpreg2);
1017 }
1018 else
1019 do
1020 {
1021 int size = value;
1022 rtx insn;
1023
1024 if (size > 60)
1025 size = 60;
1026 else if (size < -60)
1027 /* We could use -62, but that would leave the stack unaligned, so
1028 it's no good. */
1029 size = -60;
1030
1031 insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
1032 if (frame)
1033 RTX_FRAME_RELATED_P (insn) = 1;
1034 value -= size;
1035 }
1036 while (value != 0);
1037 }
1038
1039 /* Generate a LINK insn for a frame sized FRAME_SIZE. If this constant
1040 is too large, generate a sequence of insns that has the same effect.
1041 SPREG contains (reg:SI REG_SP). */
1042
1043 static void
1044 emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
1045 {
1046 HOST_WIDE_INT link_size = frame_size;
1047 rtx insn;
1048 int i;
1049
1050 if (link_size > 262140)
1051 link_size = 262140;
1052
1053 /* Use a LINK insn with as big a constant as possible, then subtract
1054 any remaining size from the SP. */
1055 insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
1056 RTX_FRAME_RELATED_P (insn) = 1;
1057
1058 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1059 {
1060 rtx set = XVECEXP (PATTERN (insn), 0, i);
1061 gcc_assert (GET_CODE (set) == SET);
1062 RTX_FRAME_RELATED_P (set) = 1;
1063 }
1064
1065 frame_size -= link_size;
1066
1067 if (frame_size > 0)
1068 {
1069 /* Must use a call-clobbered PREG that isn't the static chain. */
1070 rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
1071
1072 frame_related_constant_load (tmpreg, -frame_size, TRUE);
1073 insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
1074 RTX_FRAME_RELATED_P (insn) = 1;
1075 }
1076 }
1077
1078 /* Return the number of bytes we must reserve for outgoing arguments
1079 in the current function's stack frame. */
1080
1081 static HOST_WIDE_INT
1082 arg_area_size (void)
1083 {
1084 if (crtl->outgoing_args_size)
1085 {
1086 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
1087 return crtl->outgoing_args_size;
1088 else
1089 return FIXED_STACK_AREA;
1090 }
1091 return 0;
1092 }
1093
1094 /* Save RETS and FP, and allocate a stack frame. ALL is true if the
1095 function must save all its registers (true only for certain interrupt
1096 handlers). */
1097
1098 static void
1099 do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
1100 {
1101 frame_size += arg_area_size ();
1102
1103 if (all
1104 || stack_frame_needed_p ()
1105 || (must_save_rets_p () && must_save_fp_p ()))
1106 emit_link_insn (spreg, frame_size);
1107 else
1108 {
1109 if (must_save_rets_p ())
1110 {
1111 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
1112 gen_rtx_PRE_DEC (Pmode, spreg)),
1113 bfin_rets_rtx);
1114 rtx insn = emit_insn (pat);
1115 RTX_FRAME_RELATED_P (insn) = 1;
1116 }
1117 if (must_save_fp_p ())
1118 {
1119 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
1120 gen_rtx_PRE_DEC (Pmode, spreg)),
1121 gen_rtx_REG (Pmode, REG_FP));
1122 rtx insn = emit_insn (pat);
1123 RTX_FRAME_RELATED_P (insn) = 1;
1124 }
1125 add_to_reg (spreg, -frame_size, 1, 0);
1126 }
1127 }
1128
1129 /* Like do_link, but used for epilogues to deallocate the stack frame.
1130 EPILOGUE_P is zero if this function is called for prologue,
1131 otherwise it's nonzero. And it's less than zero if this is for
1132 sibcall epilogue. */
1133
1134 static void
1135 do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
1136 {
1137 frame_size += arg_area_size ();
1138
1139 if (stack_frame_needed_p ())
1140 emit_insn (gen_unlink ());
1141 else
1142 {
1143 rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
1144
1145 add_to_reg (spreg, frame_size, 0, epilogue_p);
1146 if (all || must_save_fp_p ())
1147 {
1148 rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
1149 emit_move_insn (fpreg, postinc);
1150 emit_use (fpreg);
1151 }
1152 if (all || must_save_rets_p ())
1153 {
1154 emit_move_insn (bfin_rets_rtx, postinc);
1155 emit_use (bfin_rets_rtx);
1156 }
1157 }
1158 }
1159
1160 /* Generate a prologue suitable for a function of kind FKIND. This is
1161 called for interrupt and exception handler prologues.
1162 SPREG contains (reg:SI REG_SP). */
1163
1164 static void
1165 expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
1166 {
1167 HOST_WIDE_INT frame_size = get_frame_size ();
1168 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
1169 rtx predec = gen_rtx_MEM (SImode, predec1);
1170 rtx insn;
1171 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1172 tree kspisusp = lookup_attribute ("kspisusp", attrs);
1173
1174 if (kspisusp)
1175 {
1176 insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
1177 RTX_FRAME_RELATED_P (insn) = 1;
1178 }
1179
1180 /* We need space on the stack in case we need to save the argument
1181 registers. */
1182 if (fkind == EXCPT_HANDLER)
1183 {
1184 insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
1185 RTX_FRAME_RELATED_P (insn) = 1;
1186 }
1187
1188 /* If we're calling other functions, they won't save their call-clobbered
1189 registers, so we must save everything here. */
1190 if (!current_function_is_leaf)
1191 all = true;
1192 expand_prologue_reg_save (spreg, all, true);
1193
1194 if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
1195 {
1196 rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
1197 rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
1198 emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
1199 emit_insn (gen_movsi_high (p5reg, chipid));
1200 emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
1201 emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
1202 }
1203
1204 if (lookup_attribute ("nesting", attrs))
1205 {
1206 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1207 insn = emit_move_insn (predec, srcreg);
1208 RTX_FRAME_RELATED_P (insn) = 1;
1209 }
1210
1211 do_link (spreg, frame_size, all);
1212
1213 if (fkind == EXCPT_HANDLER)
1214 {
1215 rtx r0reg = gen_rtx_REG (SImode, REG_R0);
1216 rtx r1reg = gen_rtx_REG (SImode, REG_R1);
1217 rtx r2reg = gen_rtx_REG (SImode, REG_R2);
1218 rtx insn;
1219
1220 insn = emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
1221 insn = emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
1222 insn = emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
1223 insn = emit_move_insn (r1reg, spreg);
1224 insn = emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
1225 insn = emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
1226 }
1227 }
1228
1229 /* Generate an epilogue suitable for a function of kind FKIND. This is
1230 called for interrupt and exception handler epilogues.
1231 SPREG contains (reg:SI REG_SP). */
1232
1233 static void
1234 expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
1235 {
1236 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1237 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
1238 rtx postinc = gen_rtx_MEM (SImode, postinc1);
1239
1240 /* A slightly crude technique to stop flow from trying to delete "dead"
1241 insns. */
1242 MEM_VOLATILE_P (postinc) = 1;
1243
1244 do_unlink (spreg, get_frame_size (), all, 1);
1245
1246 if (lookup_attribute ("nesting", attrs))
1247 {
1248 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1249 emit_move_insn (srcreg, postinc);
1250 }
1251
1252 /* If we're calling other functions, they won't save their call-clobbered
1253 registers, so we must save (and restore) everything here. */
1254 if (!current_function_is_leaf)
1255 all = true;
1256
1257 expand_epilogue_reg_restore (spreg, all, true);
1258
1259 /* Deallocate any space we left on the stack in case we needed to save the
1260 argument registers. */
1261 if (fkind == EXCPT_HANDLER)
1262 emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
1263
1264 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
1265 }
1266
1267 /* Used while emitting the prologue to generate code to load the correct value
1268 into the PIC register, which is passed in DEST. */
1269
1270 static rtx
1271 bfin_load_pic_reg (rtx dest)
1272 {
1273 struct cgraph_local_info *i = NULL;
1274 rtx addr, insn;
1275
1276 i = cgraph_local_info (current_function_decl);
1277
1278 /* Functions local to the translation unit don't need to reload the
1279 pic reg, since the caller always passes a usable one. */
1280 if (i && i->local)
1281 return pic_offset_table_rtx;
1282
1283 if (bfin_lib_id_given)
1284 addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4);
1285 else
1286 addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
1287 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1288 UNSPEC_LIBRARY_OFFSET));
1289 insn = emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
1290 return dest;
1291 }
1292
1293 /* Generate RTL for the prologue of the current function. */
1294
1295 void
1296 bfin_expand_prologue (void)
1297 {
1298 HOST_WIDE_INT frame_size = get_frame_size ();
1299 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1300 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1301 rtx pic_reg_loaded = NULL_RTX;
1302 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1303 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1304
1305 if (fkind != SUBROUTINE)
1306 {
1307 expand_interrupt_handler_prologue (spreg, fkind, all);
1308 return;
1309 }
1310
1311 if (crtl->limit_stack
1312 || (TARGET_STACK_CHECK_L1
1313 && !DECL_NO_LIMIT_STACK (current_function_decl)))
1314 {
1315 HOST_WIDE_INT offset
1316 = bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
1317 STACK_POINTER_REGNUM);
1318 rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
1319 rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
1320
1321 if (!lim)
1322 {
1323 emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
1324 emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
1325 lim = p2reg;
1326 }
1327 if (GET_CODE (lim) == SYMBOL_REF)
1328 {
1329 if (TARGET_ID_SHARED_LIBRARY)
1330 {
1331 rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
1332 rtx val;
1333 pic_reg_loaded = bfin_load_pic_reg (p2reg);
1334 val = legitimize_pic_address (stack_limit_rtx, p1reg,
1335 pic_reg_loaded);
1336 emit_move_insn (p1reg, val);
1337 frame_related_constant_load (p2reg, offset, FALSE);
1338 emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
1339 lim = p2reg;
1340 }
1341 else
1342 {
1343 rtx limit = plus_constant (lim, offset);
1344 emit_move_insn (p2reg, limit);
1345 lim = p2reg;
1346 }
1347 }
1348 else
1349 {
1350 if (lim != p2reg)
1351 emit_move_insn (p2reg, lim);
1352 add_to_reg (p2reg, offset, 0, 0);
1353 lim = p2reg;
1354 }
1355 emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
1356 emit_insn (gen_trapifcc ());
1357 }
1358 expand_prologue_reg_save (spreg, all, false);
1359
1360 do_link (spreg, frame_size, false);
1361
1362 if (TARGET_ID_SHARED_LIBRARY
1363 && !TARGET_SEP_DATA
1364 && (crtl->uses_pic_offset_table
1365 || !current_function_is_leaf))
1366 bfin_load_pic_reg (pic_offset_table_rtx);
1367 }
1368
1369 /* Generate RTL for the epilogue of the current function. NEED_RETURN is zero
1370 if this is for a sibcall. EH_RETURN is nonzero if we're expanding an
1371 eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
1372 false otherwise. */
1373
1374 void
1375 bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
1376 {
1377 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1378 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1379 int e = sibcall_p ? -1 : 1;
1380 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1381 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1382
1383 if (fkind != SUBROUTINE)
1384 {
1385 expand_interrupt_handler_epilogue (spreg, fkind, all);
1386 return;
1387 }
1388
1389 do_unlink (spreg, get_frame_size (), false, e);
1390
1391 expand_epilogue_reg_restore (spreg, all, false);
1392
1393 /* Omit the return insn if this is for a sibcall. */
1394 if (! need_return)
1395 return;
1396
1397 if (eh_return)
1398 emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
1399
1400 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
1401 }
1402 \f
1403 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
1404
1405 int
1406 bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
1407 unsigned int new_reg)
1408 {
1409 /* Interrupt functions can only use registers that have already been
1410 saved by the prologue, even if they would normally be
1411 call-clobbered. */
1412
1413 if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
1414 && !df_regs_ever_live_p (new_reg))
1415 return 0;
1416
1417 return 1;
1418 }
1419
1420 /* Return the value of the return address for the frame COUNT steps up
1421 from the current frame, after the prologue.
1422 We punt for everything but the current frame by returning const0_rtx. */
1423
1424 rtx
1425 bfin_return_addr_rtx (int count)
1426 {
1427 if (count != 0)
1428 return const0_rtx;
1429
1430 return get_hard_reg_initial_val (Pmode, REG_RETS);
1431 }
1432
1433 static rtx
1434 bfin_delegitimize_address (rtx orig_x)
1435 {
1436 rtx x = orig_x;
1437
1438 if (GET_CODE (x) != MEM)
1439 return orig_x;
1440
1441 x = XEXP (x, 0);
1442 if (GET_CODE (x) == PLUS
1443 && GET_CODE (XEXP (x, 1)) == UNSPEC
1444 && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
1445 && GET_CODE (XEXP (x, 0)) == REG
1446 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
1447 return XVECEXP (XEXP (x, 1), 0, 0);
1448
1449 return orig_x;
1450 }
1451
1452 /* This predicate is used to compute the length of a load/store insn.
1453 OP is a MEM rtx, we return nonzero if its addressing mode requires a
1454 32-bit instruction. */
1455
1456 int
1457 effective_address_32bit_p (rtx op, enum machine_mode mode)
1458 {
1459 HOST_WIDE_INT offset;
1460
1461 mode = GET_MODE (op);
1462 op = XEXP (op, 0);
1463
1464 if (GET_CODE (op) != PLUS)
1465 {
1466 gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
1467 || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
1468 return 0;
1469 }
1470
1471 if (GET_CODE (XEXP (op, 1)) == UNSPEC)
1472 return 1;
1473
1474 offset = INTVAL (XEXP (op, 1));
1475
1476 /* All byte loads use a 16-bit offset. */
1477 if (GET_MODE_SIZE (mode) == 1)
1478 return 1;
1479
1480 if (GET_MODE_SIZE (mode) == 4)
1481 {
1482 /* Frame pointer relative loads can use a negative offset, all others
1483 are restricted to a small positive one. */
1484 if (XEXP (op, 0) == frame_pointer_rtx)
1485 return offset < -128 || offset > 60;
1486 return offset < 0 || offset > 60;
1487 }
1488
1489 /* Must be HImode now. */
1490 return offset < 0 || offset > 30;
1491 }
1492
1493 /* Returns true if X is a memory reference using an I register. */
1494 bool
1495 bfin_dsp_memref_p (rtx x)
1496 {
1497 if (! MEM_P (x))
1498 return false;
1499 x = XEXP (x, 0);
1500 if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
1501 || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
1502 x = XEXP (x, 0);
1503 return IREG_P (x);
1504 }
1505
1506 /* Return cost of the memory address ADDR.
1507 All addressing modes are equally cheap on the Blackfin. */
1508
1509 static int
1510 bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
1511 {
1512 return 1;
1513 }
1514
1515 /* Subroutine of print_operand; used to print a memory reference X to FILE. */
1516
1517 void
1518 print_address_operand (FILE *file, rtx x)
1519 {
1520 switch (GET_CODE (x))
1521 {
1522 case PLUS:
1523 output_address (XEXP (x, 0));
1524 fprintf (file, "+");
1525 output_address (XEXP (x, 1));
1526 break;
1527
1528 case PRE_DEC:
1529 fprintf (file, "--");
1530 output_address (XEXP (x, 0));
1531 break;
1532 case POST_INC:
1533 output_address (XEXP (x, 0));
1534 fprintf (file, "++");
1535 break;
1536 case POST_DEC:
1537 output_address (XEXP (x, 0));
1538 fprintf (file, "--");
1539 break;
1540
1541 default:
1542 gcc_assert (GET_CODE (x) != MEM);
1543 print_operand (file, x, 0);
1544 break;
1545 }
1546 }
1547
1548 /* Adding intp DImode support by Tony
1549 * -- Q: (low word)
1550 * -- R: (high word)
1551 */
1552
1553 void
1554 print_operand (FILE *file, rtx x, char code)
1555 {
1556 enum machine_mode mode;
1557
1558 if (code == '!')
1559 {
1560 if (GET_MODE (current_output_insn) == SImode)
1561 fprintf (file, " ||");
1562 else
1563 fprintf (file, ";");
1564 return;
1565 }
1566
1567 mode = GET_MODE (x);
1568
1569 switch (code)
1570 {
1571 case 'j':
1572 switch (GET_CODE (x))
1573 {
1574 case EQ:
1575 fprintf (file, "e");
1576 break;
1577 case NE:
1578 fprintf (file, "ne");
1579 break;
1580 case GT:
1581 fprintf (file, "g");
1582 break;
1583 case LT:
1584 fprintf (file, "l");
1585 break;
1586 case GE:
1587 fprintf (file, "ge");
1588 break;
1589 case LE:
1590 fprintf (file, "le");
1591 break;
1592 case GTU:
1593 fprintf (file, "g");
1594 break;
1595 case LTU:
1596 fprintf (file, "l");
1597 break;
1598 case GEU:
1599 fprintf (file, "ge");
1600 break;
1601 case LEU:
1602 fprintf (file, "le");
1603 break;
1604 default:
1605 output_operand_lossage ("invalid %%j value");
1606 }
1607 break;
1608
1609 case 'J': /* reverse logic */
1610 switch (GET_CODE(x))
1611 {
1612 case EQ:
1613 fprintf (file, "ne");
1614 break;
1615 case NE:
1616 fprintf (file, "e");
1617 break;
1618 case GT:
1619 fprintf (file, "le");
1620 break;
1621 case LT:
1622 fprintf (file, "ge");
1623 break;
1624 case GE:
1625 fprintf (file, "l");
1626 break;
1627 case LE:
1628 fprintf (file, "g");
1629 break;
1630 case GTU:
1631 fprintf (file, "le");
1632 break;
1633 case LTU:
1634 fprintf (file, "ge");
1635 break;
1636 case GEU:
1637 fprintf (file, "l");
1638 break;
1639 case LEU:
1640 fprintf (file, "g");
1641 break;
1642 default:
1643 output_operand_lossage ("invalid %%J value");
1644 }
1645 break;
1646
1647 default:
1648 switch (GET_CODE (x))
1649 {
1650 case REG:
1651 if (code == 'h')
1652 {
1653 if (REGNO (x) < 32)
1654 fprintf (file, "%s", short_reg_names[REGNO (x)]);
1655 else
1656 output_operand_lossage ("invalid operand for code '%c'", code);
1657 }
1658 else if (code == 'd')
1659 {
1660 if (REGNO (x) < 32)
1661 fprintf (file, "%s", high_reg_names[REGNO (x)]);
1662 else
1663 output_operand_lossage ("invalid operand for code '%c'", code);
1664 }
1665 else if (code == 'w')
1666 {
1667 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1668 fprintf (file, "%s.w", reg_names[REGNO (x)]);
1669 else
1670 output_operand_lossage ("invalid operand for code '%c'", code);
1671 }
1672 else if (code == 'x')
1673 {
1674 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1675 fprintf (file, "%s.x", reg_names[REGNO (x)]);
1676 else
1677 output_operand_lossage ("invalid operand for code '%c'", code);
1678 }
1679 else if (code == 'v')
1680 {
1681 if (REGNO (x) == REG_A0)
1682 fprintf (file, "AV0");
1683 else if (REGNO (x) == REG_A1)
1684 fprintf (file, "AV1");
1685 else
1686 output_operand_lossage ("invalid operand for code '%c'", code);
1687 }
1688 else if (code == 'D')
1689 {
1690 if (D_REGNO_P (REGNO (x)))
1691 fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
1692 else
1693 output_operand_lossage ("invalid operand for code '%c'", code);
1694 }
1695 else if (code == 'H')
1696 {
1697 if ((mode == DImode || mode == DFmode) && REG_P (x))
1698 fprintf (file, "%s", reg_names[REGNO (x) + 1]);
1699 else
1700 output_operand_lossage ("invalid operand for code '%c'", code);
1701 }
1702 else if (code == 'T')
1703 {
1704 if (D_REGNO_P (REGNO (x)))
1705 fprintf (file, "%s", byte_reg_names[REGNO (x)]);
1706 else
1707 output_operand_lossage ("invalid operand for code '%c'", code);
1708 }
1709 else
1710 fprintf (file, "%s", reg_names[REGNO (x)]);
1711 break;
1712
1713 case MEM:
1714 fputc ('[', file);
1715 x = XEXP (x,0);
1716 print_address_operand (file, x);
1717 fputc (']', file);
1718 break;
1719
1720 case CONST_INT:
1721 if (code == 'M')
1722 {
1723 switch (INTVAL (x))
1724 {
1725 case MACFLAG_NONE:
1726 break;
1727 case MACFLAG_FU:
1728 fputs ("(FU)", file);
1729 break;
1730 case MACFLAG_T:
1731 fputs ("(T)", file);
1732 break;
1733 case MACFLAG_TFU:
1734 fputs ("(TFU)", file);
1735 break;
1736 case MACFLAG_W32:
1737 fputs ("(W32)", file);
1738 break;
1739 case MACFLAG_IS:
1740 fputs ("(IS)", file);
1741 break;
1742 case MACFLAG_IU:
1743 fputs ("(IU)", file);
1744 break;
1745 case MACFLAG_IH:
1746 fputs ("(IH)", file);
1747 break;
1748 case MACFLAG_M:
1749 fputs ("(M)", file);
1750 break;
1751 case MACFLAG_IS_M:
1752 fputs ("(IS,M)", file);
1753 break;
1754 case MACFLAG_ISS2:
1755 fputs ("(ISS2)", file);
1756 break;
1757 case MACFLAG_S2RND:
1758 fputs ("(S2RND)", file);
1759 break;
1760 default:
1761 gcc_unreachable ();
1762 }
1763 break;
1764 }
1765 else if (code == 'b')
1766 {
1767 if (INTVAL (x) == 0)
1768 fputs ("+=", file);
1769 else if (INTVAL (x) == 1)
1770 fputs ("-=", file);
1771 else
1772 gcc_unreachable ();
1773 break;
1774 }
1775 /* Moves to half registers with d or h modifiers always use unsigned
1776 constants. */
1777 else if (code == 'd')
1778 x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
1779 else if (code == 'h')
1780 x = GEN_INT (INTVAL (x) & 0xffff);
1781 else if (code == 'N')
1782 x = GEN_INT (-INTVAL (x));
1783 else if (code == 'X')
1784 x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
1785 else if (code == 'Y')
1786 x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
1787 else if (code == 'Z')
1788 /* Used for LINK insns. */
1789 x = GEN_INT (-8 - INTVAL (x));
1790
1791 /* fall through */
1792
1793 case SYMBOL_REF:
1794 output_addr_const (file, x);
1795 break;
1796
1797 case CONST_DOUBLE:
1798 output_operand_lossage ("invalid const_double operand");
1799 break;
1800
1801 case UNSPEC:
1802 switch (XINT (x, 1))
1803 {
1804 case UNSPEC_MOVE_PIC:
1805 output_addr_const (file, XVECEXP (x, 0, 0));
1806 fprintf (file, "@GOT");
1807 break;
1808
1809 case UNSPEC_MOVE_FDPIC:
1810 output_addr_const (file, XVECEXP (x, 0, 0));
1811 fprintf (file, "@GOT17M4");
1812 break;
1813
1814 case UNSPEC_FUNCDESC_GOT17M4:
1815 output_addr_const (file, XVECEXP (x, 0, 0));
1816 fprintf (file, "@FUNCDESC_GOT17M4");
1817 break;
1818
1819 case UNSPEC_LIBRARY_OFFSET:
1820 fprintf (file, "_current_shared_library_p5_offset_");
1821 break;
1822
1823 default:
1824 gcc_unreachable ();
1825 }
1826 break;
1827
1828 default:
1829 output_addr_const (file, x);
1830 }
1831 }
1832 }
1833 \f
1834 /* Argument support functions. */
1835
1836 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1837 for a call to a function whose data type is FNTYPE.
1838 For a library call, FNTYPE is 0.
1839 VDSP C Compiler manual, our ABI says that
1840 first 3 words of arguments will use R0, R1 and R2.
1841 */
1842
1843 void
1844 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
1845 rtx libname ATTRIBUTE_UNUSED)
1846 {
1847 static CUMULATIVE_ARGS zero_cum;
1848
1849 *cum = zero_cum;
1850
1851 /* Set up the number of registers to use for passing arguments. */
1852
1853 cum->nregs = max_arg_registers;
1854 cum->arg_regs = arg_regs;
1855
1856 cum->call_cookie = CALL_NORMAL;
1857 /* Check for a longcall attribute. */
1858 if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
1859 cum->call_cookie |= CALL_SHORT;
1860 else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
1861 cum->call_cookie |= CALL_LONG;
1862
1863 return;
1864 }
1865
1866 /* Update the data in CUM to advance over an argument
1867 of mode MODE and data type TYPE.
1868 (TYPE is null for libcalls where that information may not be available.) */
1869
1870 void
1871 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
1872 int named ATTRIBUTE_UNUSED)
1873 {
1874 int count, bytes, words;
1875
1876 bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1877 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1878
1879 cum->words += words;
1880 cum->nregs -= words;
1881
1882 if (cum->nregs <= 0)
1883 {
1884 cum->nregs = 0;
1885 cum->arg_regs = NULL;
1886 }
1887 else
1888 {
1889 for (count = 1; count <= words; count++)
1890 cum->arg_regs++;
1891 }
1892
1893 return;
1894 }
1895
1896 /* Define where to put the arguments to a function.
1897 Value is zero to push the argument on the stack,
1898 or a hard register in which to store the argument.
1899
1900 MODE is the argument's machine mode.
1901 TYPE is the data type of the argument (as a tree).
1902 This is null for libcalls where that information may
1903 not be available.
1904 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1905 the preceding args and about the function being called.
1906 NAMED is nonzero if this argument is a named parameter
1907 (otherwise it is an extra parameter matching an ellipsis). */
1908
1909 struct rtx_def *
1910 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
1911 int named ATTRIBUTE_UNUSED)
1912 {
1913 int bytes
1914 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1915
1916 if (mode == VOIDmode)
1917 /* Compute operand 2 of the call insn. */
1918 return GEN_INT (cum->call_cookie);
1919
1920 if (bytes == -1)
1921 return NULL_RTX;
1922
1923 if (cum->nregs)
1924 return gen_rtx_REG (mode, *(cum->arg_regs));
1925
1926 return NULL_RTX;
1927 }
1928
1929 /* For an arg passed partly in registers and partly in memory,
1930 this is the number of bytes passed in registers.
1931 For args passed entirely in registers or entirely in memory, zero.
1932
1933 Refer VDSP C Compiler manual, our ABI.
1934 First 3 words are in registers. So, if an argument is larger
1935 than the registers available, it will span the register and
1936 stack. */
1937
1938 static int
1939 bfin_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
1940 tree type ATTRIBUTE_UNUSED,
1941 bool named ATTRIBUTE_UNUSED)
1942 {
1943 int bytes
1944 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1945 int bytes_left = cum->nregs * UNITS_PER_WORD;
1946
1947 if (bytes == -1)
1948 return 0;
1949
1950 if (bytes_left == 0)
1951 return 0;
1952 if (bytes > bytes_left)
1953 return bytes_left;
1954 return 0;
1955 }
1956
1957 /* Variable sized types are passed by reference. */
1958
1959 static bool
1960 bfin_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
1961 enum machine_mode mode ATTRIBUTE_UNUSED,
1962 const_tree type, bool named ATTRIBUTE_UNUSED)
1963 {
1964 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
1965 }
1966
1967 /* Decide whether a type should be returned in memory (true)
1968 or in a register (false). This is called by the macro
1969 TARGET_RETURN_IN_MEMORY. */
1970
1971 static bool
1972 bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
1973 {
1974 int size = int_size_in_bytes (type);
1975 return size > 2 * UNITS_PER_WORD || size == -1;
1976 }
1977
1978 /* Register in which address to store a structure value
1979 is passed to a function. */
1980 static rtx
1981 bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
1982 int incoming ATTRIBUTE_UNUSED)
1983 {
1984 return gen_rtx_REG (Pmode, REG_P0);
1985 }
1986
1987 /* Return true when register may be used to pass function parameters. */
1988
1989 bool
1990 function_arg_regno_p (int n)
1991 {
1992 int i;
1993 for (i = 0; arg_regs[i] != -1; i++)
1994 if (n == arg_regs[i])
1995 return true;
1996 return false;
1997 }
1998
1999 /* Returns 1 if OP contains a symbol reference */
2000
2001 int
2002 symbolic_reference_mentioned_p (rtx op)
2003 {
2004 register const char *fmt;
2005 register int i;
2006
2007 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2008 return 1;
2009
2010 fmt = GET_RTX_FORMAT (GET_CODE (op));
2011 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2012 {
2013 if (fmt[i] == 'E')
2014 {
2015 register int j;
2016
2017 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2018 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2019 return 1;
2020 }
2021
2022 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2023 return 1;
2024 }
2025
2026 return 0;
2027 }
2028
2029 /* Decide whether we can make a sibling call to a function. DECL is the
2030 declaration of the function being targeted by the call and EXP is the
2031 CALL_EXPR representing the call. */
2032
2033 static bool
2034 bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2035 tree exp ATTRIBUTE_UNUSED)
2036 {
2037 struct cgraph_local_info *this_func, *called_func;
2038 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
2039 if (fkind != SUBROUTINE)
2040 return false;
2041 if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
2042 return true;
2043
2044 /* When compiling for ID shared libraries, can't sibcall a local function
2045 from a non-local function, because the local function thinks it does
2046 not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
2047 sibcall epilogue, and we end up with the wrong value in P5. */
2048
2049 if (!decl)
2050 /* Not enough information. */
2051 return false;
2052
2053 this_func = cgraph_local_info (current_function_decl);
2054 called_func = cgraph_local_info (decl);
2055 return !called_func->local || this_func->local;
2056 }
2057 \f
2058 /* Emit RTL insns to initialize the variable parts of a trampoline at
2059 TRAMP. FNADDR is an RTX for the address of the function's pure
2060 code. CXT is an RTX for the static chain value for the function. */
2061
2062 void
2063 initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
2064 {
2065 rtx t1 = copy_to_reg (fnaddr);
2066 rtx t2 = copy_to_reg (cxt);
2067 rtx addr;
2068 int i = 0;
2069
2070 if (TARGET_FDPIC)
2071 {
2072 rtx a = memory_address (Pmode, plus_constant (tramp, 8));
2073 addr = memory_address (Pmode, tramp);
2074 emit_move_insn (gen_rtx_MEM (SImode, addr), a);
2075 i = 8;
2076 }
2077
2078 addr = memory_address (Pmode, plus_constant (tramp, i + 2));
2079 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
2080 emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
2081 addr = memory_address (Pmode, plus_constant (tramp, i + 6));
2082 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
2083
2084 addr = memory_address (Pmode, plus_constant (tramp, i + 10));
2085 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
2086 emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
2087 addr = memory_address (Pmode, plus_constant (tramp, i + 14));
2088 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
2089 }
2090
2091 /* Emit insns to move operands[1] into operands[0]. */
2092
2093 void
2094 emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
2095 {
2096 rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
2097
2098 gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
2099 if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
2100 operands[1] = force_reg (SImode, operands[1]);
2101 else
2102 operands[1] = legitimize_pic_address (operands[1], temp,
2103 TARGET_FDPIC ? OUR_FDPIC_REG
2104 : pic_offset_table_rtx);
2105 }
2106
2107 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
2108 Returns true if no further code must be generated, false if the caller
2109 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
2110
2111 bool
2112 expand_move (rtx *operands, enum machine_mode mode)
2113 {
2114 rtx op = operands[1];
2115 if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
2116 && SYMBOLIC_CONST (op))
2117 emit_pic_move (operands, mode);
2118 else if (mode == SImode && GET_CODE (op) == CONST
2119 && GET_CODE (XEXP (op, 0)) == PLUS
2120 && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
2121 && !bfin_legitimate_constant_p (op))
2122 {
2123 rtx dest = operands[0];
2124 rtx op0, op1;
2125 gcc_assert (!reload_in_progress && !reload_completed);
2126 op = XEXP (op, 0);
2127 op0 = force_reg (mode, XEXP (op, 0));
2128 op1 = XEXP (op, 1);
2129 if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
2130 op1 = force_reg (mode, op1);
2131 if (GET_CODE (dest) == MEM)
2132 dest = gen_reg_rtx (mode);
2133 emit_insn (gen_addsi3 (dest, op0, op1));
2134 if (dest == operands[0])
2135 return true;
2136 operands[1] = dest;
2137 }
2138 /* Don't generate memory->memory or constant->memory moves, go through a
2139 register */
2140 else if ((reload_in_progress | reload_completed) == 0
2141 && GET_CODE (operands[0]) == MEM
2142 && GET_CODE (operands[1]) != REG)
2143 operands[1] = force_reg (mode, operands[1]);
2144 return false;
2145 }
2146 \f
2147 /* Split one or more DImode RTL references into pairs of SImode
2148 references. The RTL can be REG, offsettable MEM, integer constant, or
2149 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
2150 split and "num" is its length. lo_half and hi_half are output arrays
2151 that parallel "operands". */
2152
2153 void
2154 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2155 {
2156 while (num--)
2157 {
2158 rtx op = operands[num];
2159
2160 /* simplify_subreg refuse to split volatile memory addresses,
2161 but we still have to handle it. */
2162 if (GET_CODE (op) == MEM)
2163 {
2164 lo_half[num] = adjust_address (op, SImode, 0);
2165 hi_half[num] = adjust_address (op, SImode, 4);
2166 }
2167 else
2168 {
2169 lo_half[num] = simplify_gen_subreg (SImode, op,
2170 GET_MODE (op) == VOIDmode
2171 ? DImode : GET_MODE (op), 0);
2172 hi_half[num] = simplify_gen_subreg (SImode, op,
2173 GET_MODE (op) == VOIDmode
2174 ? DImode : GET_MODE (op), 4);
2175 }
2176 }
2177 }
2178 \f
2179 bool
2180 bfin_longcall_p (rtx op, int call_cookie)
2181 {
2182 gcc_assert (GET_CODE (op) == SYMBOL_REF);
2183 if (call_cookie & CALL_SHORT)
2184 return 0;
2185 if (call_cookie & CALL_LONG)
2186 return 1;
2187 if (TARGET_LONG_CALLS)
2188 return 1;
2189 return 0;
2190 }
2191
2192 /* Expand a call instruction. FNADDR is the call target, RETVAL the return value.
2193 COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
2194 SIBCALL is nonzero if this is a sibling call. */
2195
2196 void
2197 bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
2198 {
2199 rtx use = NULL, call;
2200 rtx callee = XEXP (fnaddr, 0);
2201 int nelts = 3;
2202 rtx pat;
2203 rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
2204 rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
2205 int n;
2206
2207 /* In an untyped call, we can get NULL for operand 2. */
2208 if (cookie == NULL_RTX)
2209 cookie = const0_rtx;
2210
2211 /* Static functions and indirect calls don't need the pic register. */
2212 if (!TARGET_FDPIC && flag_pic
2213 && GET_CODE (callee) == SYMBOL_REF
2214 && !SYMBOL_REF_LOCAL_P (callee))
2215 use_reg (&use, pic_offset_table_rtx);
2216
2217 if (TARGET_FDPIC)
2218 {
2219 int caller_has_l1_text, callee_has_l1_text;
2220
2221 caller_has_l1_text = callee_has_l1_text = 0;
2222
2223 if (lookup_attribute ("l1_text",
2224 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2225 caller_has_l1_text = 1;
2226
2227 if (GET_CODE (callee) == SYMBOL_REF
2228 && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee))
2229 && lookup_attribute
2230 ("l1_text",
2231 DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2232 callee_has_l1_text = 1;
2233
2234 if (GET_CODE (callee) != SYMBOL_REF
2235 || bfin_longcall_p (callee, INTVAL (cookie))
2236 || (GET_CODE (callee) == SYMBOL_REF
2237 && !SYMBOL_REF_LOCAL_P (callee)
2238 && TARGET_INLINE_PLT)
2239 || caller_has_l1_text != callee_has_l1_text
2240 || (caller_has_l1_text && callee_has_l1_text
2241 && (GET_CODE (callee) != SYMBOL_REF
2242 || !SYMBOL_REF_LOCAL_P (callee))))
2243 {
2244 rtx addr = callee;
2245 if (! address_operand (addr, Pmode))
2246 addr = force_reg (Pmode, addr);
2247
2248 fnaddr = gen_reg_rtx (SImode);
2249 emit_insn (gen_load_funcdescsi (fnaddr, addr));
2250 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
2251
2252 picreg = gen_reg_rtx (SImode);
2253 emit_insn (gen_load_funcdescsi (picreg,
2254 plus_constant (addr, 4)));
2255 }
2256
2257 nelts++;
2258 }
2259 else if ((!register_no_elim_operand (callee, Pmode)
2260 && GET_CODE (callee) != SYMBOL_REF)
2261 || (GET_CODE (callee) == SYMBOL_REF
2262 && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
2263 || bfin_longcall_p (callee, INTVAL (cookie)))))
2264 {
2265 callee = copy_to_mode_reg (Pmode, callee);
2266 fnaddr = gen_rtx_MEM (Pmode, callee);
2267 }
2268 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
2269
2270 if (retval)
2271 call = gen_rtx_SET (VOIDmode, retval, call);
2272
2273 pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
2274 n = 0;
2275 XVECEXP (pat, 0, n++) = call;
2276 if (TARGET_FDPIC)
2277 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
2278 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
2279 if (sibcall)
2280 XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode);
2281 else
2282 XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
2283 call = emit_call_insn (pat);
2284 if (use)
2285 CALL_INSN_FUNCTION_USAGE (call) = use;
2286 }
2287 \f
2288 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
2289
2290 int
2291 hard_regno_mode_ok (int regno, enum machine_mode mode)
2292 {
2293 /* Allow only dregs to store value of mode HI or QI */
2294 enum reg_class rclass = REGNO_REG_CLASS (regno);
2295
2296 if (mode == CCmode)
2297 return 0;
2298
2299 if (mode == V2HImode)
2300 return D_REGNO_P (regno);
2301 if (rclass == CCREGS)
2302 return mode == BImode;
2303 if (mode == PDImode || mode == V2PDImode)
2304 return regno == REG_A0 || regno == REG_A1;
2305
2306 /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
2307 up with a bad register class (such as ALL_REGS) for DImode. */
2308 if (mode == DImode)
2309 return regno < REG_M3;
2310
2311 if (mode == SImode
2312 && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
2313 return 1;
2314
2315 return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
2316 }
2317
2318 /* Implements target hook vector_mode_supported_p. */
2319
2320 static bool
2321 bfin_vector_mode_supported_p (enum machine_mode mode)
2322 {
2323 return mode == V2HImode;
2324 }
2325
2326 /* Return the cost of moving data from a register in class CLASS1 to
2327 one in class CLASS2. A cost of 2 is the default. */
2328
2329 int
2330 bfin_register_move_cost (enum machine_mode mode,
2331 enum reg_class class1, enum reg_class class2)
2332 {
2333 /* These need secondary reloads, so they're more expensive. */
2334 if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
2335 || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
2336 return 4;
2337
2338 /* If optimizing for size, always prefer reg-reg over reg-memory moves. */
2339 if (optimize_size)
2340 return 2;
2341
2342 if (GET_MODE_CLASS (mode) == MODE_INT)
2343 {
2344 /* Discourage trying to use the accumulators. */
2345 if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
2346 || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
2347 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
2348 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
2349 return 20;
2350 }
2351 return 2;
2352 }
2353
2354 /* Return the cost of moving data of mode M between a
2355 register and memory. A value of 2 is the default; this cost is
2356 relative to those in `REGISTER_MOVE_COST'.
2357
2358 ??? In theory L1 memory has single-cycle latency. We should add a switch
2359 that tells the compiler whether we expect to use only L1 memory for the
2360 program; it'll make the costs more accurate. */
2361
2362 int
2363 bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2364 enum reg_class rclass,
2365 int in ATTRIBUTE_UNUSED)
2366 {
2367 /* Make memory accesses slightly more expensive than any register-register
2368 move. Also, penalize non-DP registers, since they need secondary
2369 reloads to load and store. */
2370 if (! reg_class_subset_p (rclass, DPREGS))
2371 return 10;
2372
2373 return 8;
2374 }
2375
2376 /* Inform reload about cases where moving X with a mode MODE to a register in
2377 RCLASS requires an extra scratch register. Return the class needed for the
2378 scratch register. */
2379
2380 static enum reg_class
2381 bfin_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
2382 enum machine_mode mode, secondary_reload_info *sri)
2383 {
2384 /* If we have HImode or QImode, we can only use DREGS as secondary registers;
2385 in most other cases we can also use PREGS. */
2386 enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
2387 enum reg_class x_class = NO_REGS;
2388 enum rtx_code code = GET_CODE (x);
2389
2390 if (code == SUBREG)
2391 x = SUBREG_REG (x), code = GET_CODE (x);
2392 if (REG_P (x))
2393 {
2394 int regno = REGNO (x);
2395 if (regno >= FIRST_PSEUDO_REGISTER)
2396 regno = reg_renumber[regno];
2397
2398 if (regno == -1)
2399 code = MEM;
2400 else
2401 x_class = REGNO_REG_CLASS (regno);
2402 }
2403
2404 /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
2405 This happens as a side effect of register elimination, and we need
2406 a scratch register to do it. */
2407 if (fp_plus_const_operand (x, mode))
2408 {
2409 rtx op2 = XEXP (x, 1);
2410 int large_constant_p = ! satisfies_constraint_Ks7 (op2);
2411
2412 if (rclass == PREGS || rclass == PREGS_CLOBBERED)
2413 return NO_REGS;
2414 /* If destination is a DREG, we can do this without a scratch register
2415 if the constant is valid for an add instruction. */
2416 if ((rclass == DREGS || rclass == DPREGS)
2417 && ! large_constant_p)
2418 return NO_REGS;
2419 /* Reloading to anything other than a DREG? Use a PREG scratch
2420 register. */
2421 sri->icode = CODE_FOR_reload_insi;
2422 return NO_REGS;
2423 }
2424
2425 /* Data can usually be moved freely between registers of most classes.
2426 AREGS are an exception; they can only move to or from another register
2427 in AREGS or one in DREGS. They can also be assigned the constant 0. */
2428 if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
2429 return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
2430 || rclass == ODD_AREGS
2431 ? NO_REGS : DREGS);
2432
2433 if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
2434 {
2435 if (code == MEM)
2436 {
2437 sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
2438 return NO_REGS;
2439 }
2440
2441 if (x != const0_rtx && x_class != DREGS)
2442 {
2443 return DREGS;
2444 }
2445 else
2446 return NO_REGS;
2447 }
2448
2449 /* CCREGS can only be moved from/to DREGS. */
2450 if (rclass == CCREGS && x_class != DREGS)
2451 return DREGS;
2452 if (x_class == CCREGS && rclass != DREGS)
2453 return DREGS;
2454
2455 /* All registers other than AREGS can load arbitrary constants. The only
2456 case that remains is MEM. */
2457 if (code == MEM)
2458 if (! reg_class_subset_p (rclass, default_class))
2459 return default_class;
2460
2461 return NO_REGS;
2462 }
2463 \f
2464 /* Implement TARGET_HANDLE_OPTION. */
2465
2466 static bool
2467 bfin_handle_option (size_t code, const char *arg, int value)
2468 {
2469 switch (code)
2470 {
2471 case OPT_mshared_library_id_:
2472 if (value > MAX_LIBRARY_ID)
2473 error ("-mshared-library-id=%s is not between 0 and %d",
2474 arg, MAX_LIBRARY_ID);
2475 bfin_lib_id_given = 1;
2476 return true;
2477
2478 case OPT_mcpu_:
2479 {
2480 const char *p, *q;
2481 int i;
2482
2483 i = 0;
2484 while ((p = bfin_cpus[i].name) != NULL)
2485 {
2486 if (strncmp (arg, p, strlen (p)) == 0)
2487 break;
2488 i++;
2489 }
2490
2491 if (p == NULL)
2492 {
2493 error ("-mcpu=%s is not valid", arg);
2494 return false;
2495 }
2496
2497 bfin_cpu_type = bfin_cpus[i].type;
2498
2499 q = arg + strlen (p);
2500
2501 if (*q == '\0')
2502 {
2503 bfin_si_revision = bfin_cpus[i].si_revision;
2504 bfin_workarounds |= bfin_cpus[i].workarounds;
2505 }
2506 else if (strcmp (q, "-none") == 0)
2507 bfin_si_revision = -1;
2508 else if (strcmp (q, "-any") == 0)
2509 {
2510 bfin_si_revision = 0xffff;
2511 while (bfin_cpus[i].type == bfin_cpu_type)
2512 {
2513 bfin_workarounds |= bfin_cpus[i].workarounds;
2514 i++;
2515 }
2516 }
2517 else
2518 {
2519 unsigned int si_major, si_minor;
2520 int rev_len, n;
2521
2522 rev_len = strlen (q);
2523
2524 if (sscanf (q, "-%u.%u%n", &si_major, &si_minor, &n) != 2
2525 || n != rev_len
2526 || si_major > 0xff || si_minor > 0xff)
2527 {
2528 invalid_silicon_revision:
2529 error ("-mcpu=%s has invalid silicon revision", arg);
2530 return false;
2531 }
2532
2533 bfin_si_revision = (si_major << 8) | si_minor;
2534
2535 while (bfin_cpus[i].type == bfin_cpu_type
2536 && bfin_cpus[i].si_revision != bfin_si_revision)
2537 i++;
2538
2539 if (bfin_cpus[i].type != bfin_cpu_type)
2540 goto invalid_silicon_revision;
2541
2542 bfin_workarounds |= bfin_cpus[i].workarounds;
2543 }
2544
2545 return true;
2546 }
2547
2548 default:
2549 return true;
2550 }
2551 }
2552
2553 static struct machine_function *
2554 bfin_init_machine_status (void)
2555 {
2556 struct machine_function *f;
2557
2558 f = GGC_CNEW (struct machine_function);
2559
2560 return f;
2561 }
2562
2563 /* Implement the macro OVERRIDE_OPTIONS. */
2564
2565 void
2566 override_options (void)
2567 {
2568 /* If processor type is not specified, enable all workarounds. */
2569 if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
2570 {
2571 int i;
2572
2573 for (i = 0; bfin_cpus[i].name != NULL; i++)
2574 bfin_workarounds |= bfin_cpus[i].workarounds;
2575
2576 bfin_si_revision = 0xffff;
2577 }
2578
2579 if (bfin_csync_anomaly == 1)
2580 bfin_workarounds |= WA_SPECULATIVE_SYNCS;
2581 else if (bfin_csync_anomaly == 0)
2582 bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
2583
2584 if (bfin_specld_anomaly == 1)
2585 bfin_workarounds |= WA_SPECULATIVE_LOADS;
2586 else if (bfin_specld_anomaly == 0)
2587 bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
2588
2589 if (TARGET_OMIT_LEAF_FRAME_POINTER)
2590 flag_omit_frame_pointer = 1;
2591
2592 /* Library identification */
2593 if (bfin_lib_id_given && ! TARGET_ID_SHARED_LIBRARY)
2594 error ("-mshared-library-id= specified without -mid-shared-library");
2595
2596 if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
2597 error ("Can't use multiple stack checking methods together.");
2598
2599 if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
2600 error ("ID shared libraries and FD-PIC mode can't be used together.");
2601
2602 /* Don't allow the user to specify -mid-shared-library and -msep-data
2603 together, as it makes little sense from a user's point of view... */
2604 if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
2605 error ("cannot specify both -msep-data and -mid-shared-library");
2606 /* ... internally, however, it's nearly the same. */
2607 if (TARGET_SEP_DATA)
2608 target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
2609
2610 if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
2611 flag_pic = 1;
2612
2613 /* There is no single unaligned SI op for PIC code. Sometimes we
2614 need to use ".4byte" and sometimes we need to use ".picptr".
2615 See bfin_assemble_integer for details. */
2616 if (TARGET_FDPIC)
2617 targetm.asm_out.unaligned_op.si = 0;
2618
2619 /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
2620 since we don't support it and it'll just break. */
2621 if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
2622 flag_pic = 0;
2623
2624 if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
2625 error ("-mmulticore can only be used with BF561");
2626
2627 if (TARGET_COREA && !TARGET_MULTICORE)
2628 error ("-mcorea should be used with -mmulticore");
2629
2630 if (TARGET_COREB && !TARGET_MULTICORE)
2631 error ("-mcoreb should be used with -mmulticore");
2632
2633 if (TARGET_COREA && TARGET_COREB)
2634 error ("-mcorea and -mcoreb can't be used together");
2635
2636 flag_schedule_insns = 0;
2637
2638 /* Passes after sched2 can break the helpful TImode annotations that
2639 haifa-sched puts on every insn. Just do scheduling in reorg. */
2640 bfin_flag_schedule_insns2 = flag_schedule_insns_after_reload;
2641 flag_schedule_insns_after_reload = 0;
2642
2643 init_machine_status = bfin_init_machine_status;
2644 }
2645
2646 /* Return the destination address of BRANCH.
2647 We need to use this instead of get_attr_length, because the
2648 cbranch_with_nops pattern conservatively sets its length to 6, and
2649 we still prefer to use shorter sequences. */
2650
2651 static int
2652 branch_dest (rtx branch)
2653 {
2654 rtx dest;
2655 int dest_uid;
2656 rtx pat = PATTERN (branch);
2657 if (GET_CODE (pat) == PARALLEL)
2658 pat = XVECEXP (pat, 0, 0);
2659 dest = SET_SRC (pat);
2660 if (GET_CODE (dest) == IF_THEN_ELSE)
2661 dest = XEXP (dest, 1);
2662 dest = XEXP (dest, 0);
2663 dest_uid = INSN_UID (dest);
2664 return INSN_ADDRESSES (dest_uid);
2665 }
2666
2667 /* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
2668 it's a branch that's predicted taken. */
2669
2670 static int
2671 cbranch_predicted_taken_p (rtx insn)
2672 {
2673 rtx x = find_reg_note (insn, REG_BR_PROB, 0);
2674
2675 if (x)
2676 {
2677 int pred_val = INTVAL (XEXP (x, 0));
2678
2679 return pred_val >= REG_BR_PROB_BASE / 2;
2680 }
2681
2682 return 0;
2683 }
2684
2685 /* Templates for use by asm_conditional_branch. */
2686
2687 static const char *ccbranch_templates[][3] = {
2688 { "if !cc jump %3;", "if cc jump 4 (bp); jump.s %3;", "if cc jump 6 (bp); jump.l %3;" },
2689 { "if cc jump %3;", "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
2690 { "if !cc jump %3 (bp);", "if cc jump 4; jump.s %3;", "if cc jump 6; jump.l %3;" },
2691 { "if cc jump %3 (bp);", "if !cc jump 4; jump.s %3;", "if !cc jump 6; jump.l %3;" },
2692 };
2693
2694 /* Output INSN, which is a conditional branch instruction with operands
2695 OPERANDS.
2696
2697 We deal with the various forms of conditional branches that can be generated
2698 by bfin_reorg to prevent the hardware from doing speculative loads, by
2699 - emitting a sufficient number of nops, if N_NOPS is nonzero, or
2700 - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
2701 Either of these is only necessary if the branch is short, otherwise the
2702 template we use ends in an unconditional jump which flushes the pipeline
2703 anyway. */
2704
2705 void
2706 asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken)
2707 {
2708 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2709 /* Note : offset for instructions like if cc jmp; jump.[sl] offset
2710 is to be taken from start of if cc rather than jump.
2711 Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
2712 */
2713 int len = (offset >= -1024 && offset <= 1022 ? 0
2714 : offset >= -4094 && offset <= 4096 ? 1
2715 : 2);
2716 int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
2717 int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
2718 output_asm_insn (ccbranch_templates[idx][len], operands);
2719 gcc_assert (n_nops == 0 || !bp);
2720 if (len == 0)
2721 while (n_nops-- > 0)
2722 output_asm_insn ("nop;", NULL);
2723 }
2724
2725 /* Emit rtl for a comparison operation CMP in mode MODE. Operands have been
2726 stored in bfin_compare_op0 and bfin_compare_op1 already. */
2727
2728 rtx
2729 bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
2730 {
2731 enum rtx_code code1, code2;
2732 rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
2733 rtx tem = bfin_cc_rtx;
2734 enum rtx_code code = GET_CODE (cmp);
2735
2736 /* If we have a BImode input, then we already have a compare result, and
2737 do not need to emit another comparison. */
2738 if (GET_MODE (op0) == BImode)
2739 {
2740 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
2741 tem = op0, code2 = code;
2742 }
2743 else
2744 {
2745 switch (code) {
2746 /* bfin has these conditions */
2747 case EQ:
2748 case LT:
2749 case LE:
2750 case LEU:
2751 case LTU:
2752 code1 = code;
2753 code2 = NE;
2754 break;
2755 default:
2756 code1 = reverse_condition (code);
2757 code2 = EQ;
2758 break;
2759 }
2760 emit_insn (gen_rtx_SET (VOIDmode, tem,
2761 gen_rtx_fmt_ee (code1, BImode, op0, op1)));
2762 }
2763
2764 return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
2765 }
2766 \f
2767 /* Return nonzero iff C has exactly one bit set if it is interpreted
2768 as a 32-bit constant. */
2769
2770 int
2771 log2constp (unsigned HOST_WIDE_INT c)
2772 {
2773 c &= 0xFFFFFFFF;
2774 return c != 0 && (c & (c-1)) == 0;
2775 }
2776
2777 /* Returns the number of consecutive least significant zeros in the binary
2778 representation of *V.
2779 We modify *V to contain the original value arithmetically shifted right by
2780 the number of zeroes. */
2781
2782 static int
2783 shiftr_zero (HOST_WIDE_INT *v)
2784 {
2785 unsigned HOST_WIDE_INT tmp = *v;
2786 unsigned HOST_WIDE_INT sgn;
2787 int n = 0;
2788
2789 if (tmp == 0)
2790 return 0;
2791
2792 sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
2793 while ((tmp & 0x1) == 0 && n <= 32)
2794 {
2795 tmp = (tmp >> 1) | sgn;
2796 n++;
2797 }
2798 *v = tmp;
2799 return n;
2800 }
2801
2802 /* After reload, split the load of an immediate constant. OPERANDS are the
2803 operands of the movsi_insn pattern which we are splitting. We return
2804 nonzero if we emitted a sequence to load the constant, zero if we emitted
2805 nothing because we want to use the splitter's default sequence. */
2806
2807 int
2808 split_load_immediate (rtx operands[])
2809 {
2810 HOST_WIDE_INT val = INTVAL (operands[1]);
2811 HOST_WIDE_INT tmp;
2812 HOST_WIDE_INT shifted = val;
2813 HOST_WIDE_INT shifted_compl = ~val;
2814 int num_zero = shiftr_zero (&shifted);
2815 int num_compl_zero = shiftr_zero (&shifted_compl);
2816 unsigned int regno = REGNO (operands[0]);
2817
2818 /* This case takes care of single-bit set/clear constants, which we could
2819 also implement with BITSET/BITCLR. */
2820 if (num_zero
2821 && shifted >= -32768 && shifted < 65536
2822 && (D_REGNO_P (regno)
2823 || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
2824 {
2825 emit_insn (gen_movsi (operands[0], GEN_INT (shifted)));
2826 emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
2827 return 1;
2828 }
2829
2830 tmp = val & 0xFFFF;
2831 tmp |= -(tmp & 0x8000);
2832
2833 /* If high word has one bit set or clear, try to use a bit operation. */
2834 if (D_REGNO_P (regno))
2835 {
2836 if (log2constp (val & 0xFFFF0000))
2837 {
2838 emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
2839 emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000)));
2840 return 1;
2841 }
2842 else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
2843 {
2844 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2845 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF)));
2846 }
2847 }
2848
2849 if (D_REGNO_P (regno))
2850 {
2851 if (tmp >= -64 && tmp <= 63)
2852 {
2853 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2854 emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
2855 return 1;
2856 }
2857
2858 if ((val & 0xFFFF0000) == 0)
2859 {
2860 emit_insn (gen_movsi (operands[0], const0_rtx));
2861 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2862 return 1;
2863 }
2864
2865 if ((val & 0xFFFF0000) == 0xFFFF0000)
2866 {
2867 emit_insn (gen_movsi (operands[0], constm1_rtx));
2868 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2869 return 1;
2870 }
2871 }
2872
2873 /* Need DREGs for the remaining case. */
2874 if (regno > REG_R7)
2875 return 0;
2876
2877 if (optimize_size
2878 && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
2879 {
2880 /* If optimizing for size, generate a sequence that has more instructions
2881 but is shorter. */
2882 emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl)));
2883 emit_insn (gen_ashlsi3 (operands[0], operands[0],
2884 GEN_INT (num_compl_zero)));
2885 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
2886 return 1;
2887 }
2888 return 0;
2889 }
2890 \f
2891 /* Return true if the legitimate memory address for a memory operand of mode
2892 MODE. Return false if not. */
2893
2894 static bool
2895 bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value)
2896 {
2897 unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
2898 int sz = GET_MODE_SIZE (mode);
2899 int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
2900 /* The usual offsettable_memref machinery doesn't work so well for this
2901 port, so we deal with the problem here. */
2902 if (value > 0 && sz == 8)
2903 v += 4;
2904 return (v & ~(0x7fff << shift)) == 0;
2905 }
2906
2907 static bool
2908 bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode,
2909 enum rtx_code outer_code)
2910 {
2911 if (strict)
2912 return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
2913 else
2914 return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
2915 }
2916
2917 /* Recognize an RTL expression that is a valid memory address for an
2918 instruction. The MODE argument is the machine mode for the MEM expression
2919 that wants to use this address.
2920
2921 Blackfin addressing modes are as follows:
2922
2923 [preg]
2924 [preg + imm16]
2925
2926 B [ Preg + uimm15 ]
2927 W [ Preg + uimm16m2 ]
2928 [ Preg + uimm17m4 ]
2929
2930 [preg++]
2931 [preg--]
2932 [--sp]
2933 */
2934
2935 static bool
2936 bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
2937 {
2938 switch (GET_CODE (x)) {
2939 case REG:
2940 if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
2941 return true;
2942 break;
2943 case PLUS:
2944 if (REG_P (XEXP (x, 0))
2945 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
2946 && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
2947 || (GET_CODE (XEXP (x, 1)) == CONST_INT
2948 && bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
2949 return true;
2950 break;
2951 case POST_INC:
2952 case POST_DEC:
2953 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2954 && REG_P (XEXP (x, 0))
2955 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
2956 return true;
2957 case PRE_DEC:
2958 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2959 && XEXP (x, 0) == stack_pointer_rtx
2960 && REG_P (XEXP (x, 0))
2961 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
2962 return true;
2963 break;
2964 default:
2965 break;
2966 }
2967 return false;
2968 }
2969
2970 /* Decide whether we can force certain constants to memory. If we
2971 decide we can't, the caller should be able to cope with it in
2972 another way. */
2973
2974 static bool
2975 bfin_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED)
2976 {
2977 /* We have only one class of non-legitimate constants, and our movsi
2978 expander knows how to handle them. Dropping these constants into the
2979 data section would only shift the problem - we'd still get relocs
2980 outside the object, in the data section rather than the text section. */
2981 return true;
2982 }
2983
2984 /* Ensure that for any constant of the form symbol + offset, the offset
2985 remains within the object. Any other constants are ok.
2986 This ensures that flat binaries never have to deal with relocations
2987 crossing section boundaries. */
2988
2989 bool
2990 bfin_legitimate_constant_p (rtx x)
2991 {
2992 rtx sym;
2993 HOST_WIDE_INT offset;
2994
2995 if (GET_CODE (x) != CONST)
2996 return true;
2997
2998 x = XEXP (x, 0);
2999 gcc_assert (GET_CODE (x) == PLUS);
3000
3001 sym = XEXP (x, 0);
3002 x = XEXP (x, 1);
3003 if (GET_CODE (sym) != SYMBOL_REF
3004 || GET_CODE (x) != CONST_INT)
3005 return true;
3006 offset = INTVAL (x);
3007
3008 if (SYMBOL_REF_DECL (sym) == 0)
3009 return true;
3010 if (offset < 0
3011 || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
3012 return false;
3013
3014 return true;
3015 }
3016
3017 static bool
3018 bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
3019 {
3020 int cost2 = COSTS_N_INSNS (1);
3021 rtx op0, op1;
3022
3023 switch (code)
3024 {
3025 case CONST_INT:
3026 if (outer_code == SET || outer_code == PLUS)
3027 *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
3028 else if (outer_code == AND)
3029 *total = log2constp (~INTVAL (x)) ? 0 : cost2;
3030 else if (outer_code == LE || outer_code == LT || outer_code == EQ)
3031 *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
3032 else if (outer_code == LEU || outer_code == LTU)
3033 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
3034 else if (outer_code == MULT)
3035 *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
3036 else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
3037 *total = 0;
3038 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
3039 || outer_code == LSHIFTRT)
3040 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
3041 else if (outer_code == IOR || outer_code == XOR)
3042 *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
3043 else
3044 *total = cost2;
3045 return true;
3046
3047 case CONST:
3048 case LABEL_REF:
3049 case SYMBOL_REF:
3050 case CONST_DOUBLE:
3051 *total = COSTS_N_INSNS (2);
3052 return true;
3053
3054 case PLUS:
3055 op0 = XEXP (x, 0);
3056 op1 = XEXP (x, 1);
3057 if (GET_MODE (x) == SImode)
3058 {
3059 if (GET_CODE (op0) == MULT
3060 && GET_CODE (XEXP (op0, 1)) == CONST_INT)
3061 {
3062 HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
3063 if (val == 2 || val == 4)
3064 {
3065 *total = cost2;
3066 *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
3067 *total += rtx_cost (op1, outer_code, speed);
3068 return true;
3069 }
3070 }
3071 *total = cost2;
3072 if (GET_CODE (op0) != REG
3073 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3074 *total += rtx_cost (op0, SET, speed);
3075 #if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
3076 towards creating too many induction variables. */
3077 if (!reg_or_7bit_operand (op1, SImode))
3078 *total += rtx_cost (op1, SET, speed);
3079 #endif
3080 }
3081 else if (GET_MODE (x) == DImode)
3082 {
3083 *total = 6 * cost2;
3084 if (GET_CODE (op1) != CONST_INT
3085 || !satisfies_constraint_Ks7 (op1))
3086 *total += rtx_cost (op1, PLUS, speed);
3087 if (GET_CODE (op0) != REG
3088 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3089 *total += rtx_cost (op0, PLUS, speed);
3090 }
3091 return true;
3092
3093 case MINUS:
3094 if (GET_MODE (x) == DImode)
3095 *total = 6 * cost2;
3096 else
3097 *total = cost2;
3098 return true;
3099
3100 case ASHIFT:
3101 case ASHIFTRT:
3102 case LSHIFTRT:
3103 if (GET_MODE (x) == DImode)
3104 *total = 6 * cost2;
3105 else
3106 *total = cost2;
3107
3108 op0 = XEXP (x, 0);
3109 op1 = XEXP (x, 1);
3110 if (GET_CODE (op0) != REG
3111 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3112 *total += rtx_cost (op0, code, speed);
3113
3114 return true;
3115
3116 case IOR:
3117 case AND:
3118 case XOR:
3119 op0 = XEXP (x, 0);
3120 op1 = XEXP (x, 1);
3121
3122 /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */
3123 if (code == IOR)
3124 {
3125 if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
3126 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
3127 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
3128 || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
3129 {
3130 *total = cost2;
3131 return true;
3132 }
3133 }
3134
3135 if (GET_CODE (op0) != REG
3136 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3137 *total += rtx_cost (op0, code, speed);
3138
3139 if (GET_MODE (x) == DImode)
3140 {
3141 *total = 2 * cost2;
3142 return true;
3143 }
3144 *total = cost2;
3145 if (GET_MODE (x) != SImode)
3146 return true;
3147
3148 if (code == AND)
3149 {
3150 if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
3151 *total += rtx_cost (XEXP (x, 1), code, speed);
3152 }
3153 else
3154 {
3155 if (! regorlog2_operand (XEXP (x, 1), SImode))
3156 *total += rtx_cost (XEXP (x, 1), code, speed);
3157 }
3158
3159 return true;
3160
3161 case ZERO_EXTRACT:
3162 case SIGN_EXTRACT:
3163 if (outer_code == SET
3164 && XEXP (x, 1) == const1_rtx
3165 && GET_CODE (XEXP (x, 2)) == CONST_INT)
3166 {
3167 *total = 2 * cost2;
3168 return true;
3169 }
3170 /* fall through */
3171
3172 case SIGN_EXTEND:
3173 case ZERO_EXTEND:
3174 *total = cost2;
3175 return true;
3176
3177 case MULT:
3178 {
3179 op0 = XEXP (x, 0);
3180 op1 = XEXP (x, 1);
3181 if (GET_CODE (op0) == GET_CODE (op1)
3182 && (GET_CODE (op0) == ZERO_EXTEND
3183 || GET_CODE (op0) == SIGN_EXTEND))
3184 {
3185 *total = COSTS_N_INSNS (1);
3186 op0 = XEXP (op0, 0);
3187 op1 = XEXP (op1, 0);
3188 }
3189 else if (!speed)
3190 *total = COSTS_N_INSNS (1);
3191 else
3192 *total = COSTS_N_INSNS (3);
3193
3194 if (GET_CODE (op0) != REG
3195 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3196 *total += rtx_cost (op0, MULT, speed);
3197 if (GET_CODE (op1) != REG
3198 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
3199 *total += rtx_cost (op1, MULT, speed);
3200 }
3201 return true;
3202
3203 case UDIV:
3204 case UMOD:
3205 *total = COSTS_N_INSNS (32);
3206 return true;
3207
3208 case VEC_CONCAT:
3209 case VEC_SELECT:
3210 if (outer_code == SET)
3211 *total = cost2;
3212 return true;
3213
3214 default:
3215 return false;
3216 }
3217 }
3218 \f
3219 /* Used for communication between {push,pop}_multiple_operation (which
3220 we use not only as a predicate) and the corresponding output functions. */
3221 static int first_preg_to_save, first_dreg_to_save;
3222 static int n_regs_to_save;
3223
3224 int
3225 push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3226 {
3227 int lastdreg = 8, lastpreg = 6;
3228 int i, group;
3229
3230 first_preg_to_save = lastpreg;
3231 first_dreg_to_save = lastdreg;
3232 for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
3233 {
3234 rtx t = XVECEXP (op, 0, i);
3235 rtx src, dest;
3236 int regno;
3237
3238 if (GET_CODE (t) != SET)
3239 return 0;
3240
3241 src = SET_SRC (t);
3242 dest = SET_DEST (t);
3243 if (GET_CODE (dest) != MEM || ! REG_P (src))
3244 return 0;
3245 dest = XEXP (dest, 0);
3246 if (GET_CODE (dest) != PLUS
3247 || ! REG_P (XEXP (dest, 0))
3248 || REGNO (XEXP (dest, 0)) != REG_SP
3249 || GET_CODE (XEXP (dest, 1)) != CONST_INT
3250 || INTVAL (XEXP (dest, 1)) != -i * 4)
3251 return 0;
3252
3253 regno = REGNO (src);
3254 if (group == 0)
3255 {
3256 if (D_REGNO_P (regno))
3257 {
3258 group = 1;
3259 first_dreg_to_save = lastdreg = regno - REG_R0;
3260 }
3261 else if (regno >= REG_P0 && regno <= REG_P7)
3262 {
3263 group = 2;
3264 first_preg_to_save = lastpreg = regno - REG_P0;
3265 }
3266 else
3267 return 0;
3268
3269 continue;
3270 }
3271
3272 if (group == 1)
3273 {
3274 if (regno >= REG_P0 && regno <= REG_P7)
3275 {
3276 group = 2;
3277 first_preg_to_save = lastpreg = regno - REG_P0;
3278 }
3279 else if (regno != REG_R0 + lastdreg + 1)
3280 return 0;
3281 else
3282 lastdreg++;
3283 }
3284 else if (group == 2)
3285 {
3286 if (regno != REG_P0 + lastpreg + 1)
3287 return 0;
3288 lastpreg++;
3289 }
3290 }
3291 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3292 return 1;
3293 }
3294
3295 int
3296 pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3297 {
3298 int lastdreg = 8, lastpreg = 6;
3299 int i, group;
3300
3301 for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
3302 {
3303 rtx t = XVECEXP (op, 0, i);
3304 rtx src, dest;
3305 int regno;
3306
3307 if (GET_CODE (t) != SET)
3308 return 0;
3309
3310 src = SET_SRC (t);
3311 dest = SET_DEST (t);
3312 if (GET_CODE (src) != MEM || ! REG_P (dest))
3313 return 0;
3314 src = XEXP (src, 0);
3315
3316 if (i == 1)
3317 {
3318 if (! REG_P (src) || REGNO (src) != REG_SP)
3319 return 0;
3320 }
3321 else if (GET_CODE (src) != PLUS
3322 || ! REG_P (XEXP (src, 0))
3323 || REGNO (XEXP (src, 0)) != REG_SP
3324 || GET_CODE (XEXP (src, 1)) != CONST_INT
3325 || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
3326 return 0;
3327
3328 regno = REGNO (dest);
3329 if (group == 0)
3330 {
3331 if (regno == REG_R7)
3332 {
3333 group = 1;
3334 lastdreg = 7;
3335 }
3336 else if (regno != REG_P0 + lastpreg - 1)
3337 return 0;
3338 else
3339 lastpreg--;
3340 }
3341 else if (group == 1)
3342 {
3343 if (regno != REG_R0 + lastdreg - 1)
3344 return 0;
3345 else
3346 lastdreg--;
3347 }
3348 }
3349 first_dreg_to_save = lastdreg;
3350 first_preg_to_save = lastpreg;
3351 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3352 return 1;
3353 }
3354
3355 /* Emit assembly code for one multi-register push described by INSN, with
3356 operands in OPERANDS. */
3357
3358 void
3359 output_push_multiple (rtx insn, rtx *operands)
3360 {
3361 char buf[80];
3362 int ok;
3363
3364 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3365 ok = push_multiple_operation (PATTERN (insn), VOIDmode);
3366 gcc_assert (ok);
3367
3368 if (first_dreg_to_save == 8)
3369 sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
3370 else if (first_preg_to_save == 6)
3371 sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
3372 else
3373 sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
3374 first_dreg_to_save, first_preg_to_save);
3375
3376 output_asm_insn (buf, operands);
3377 }
3378
3379 /* Emit assembly code for one multi-register pop described by INSN, with
3380 operands in OPERANDS. */
3381
3382 void
3383 output_pop_multiple (rtx insn, rtx *operands)
3384 {
3385 char buf[80];
3386 int ok;
3387
3388 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3389 ok = pop_multiple_operation (PATTERN (insn), VOIDmode);
3390 gcc_assert (ok);
3391
3392 if (first_dreg_to_save == 8)
3393 sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
3394 else if (first_preg_to_save == 6)
3395 sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
3396 else
3397 sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
3398 first_dreg_to_save, first_preg_to_save);
3399
3400 output_asm_insn (buf, operands);
3401 }
3402
3403 /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */
3404
3405 static void
3406 single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset)
3407 {
3408 rtx scratch = gen_reg_rtx (mode);
3409 rtx srcmem, dstmem;
3410
3411 srcmem = adjust_address_nv (src, mode, offset);
3412 dstmem = adjust_address_nv (dst, mode, offset);
3413 emit_move_insn (scratch, srcmem);
3414 emit_move_insn (dstmem, scratch);
3415 }
3416
3417 /* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
3418 alignment ALIGN_EXP. Return true if successful, false if we should fall
3419 back on a different method. */
3420
3421 bool
3422 bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
3423 {
3424 rtx srcreg, destreg, countreg;
3425 HOST_WIDE_INT align = 0;
3426 unsigned HOST_WIDE_INT count = 0;
3427
3428 if (GET_CODE (align_exp) == CONST_INT)
3429 align = INTVAL (align_exp);
3430 if (GET_CODE (count_exp) == CONST_INT)
3431 {
3432 count = INTVAL (count_exp);
3433 #if 0
3434 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
3435 return false;
3436 #endif
3437 }
3438
3439 /* If optimizing for size, only do single copies inline. */
3440 if (optimize_size)
3441 {
3442 if (count == 2 && align < 2)
3443 return false;
3444 if (count == 4 && align < 4)
3445 return false;
3446 if (count != 1 && count != 2 && count != 4)
3447 return false;
3448 }
3449 if (align < 2 && count != 1)
3450 return false;
3451
3452 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
3453 if (destreg != XEXP (dst, 0))
3454 dst = replace_equiv_address_nv (dst, destreg);
3455 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
3456 if (srcreg != XEXP (src, 0))
3457 src = replace_equiv_address_nv (src, srcreg);
3458
3459 if (count != 0 && align >= 2)
3460 {
3461 unsigned HOST_WIDE_INT offset = 0;
3462
3463 if (align >= 4)
3464 {
3465 if ((count & ~3) == 4)
3466 {
3467 single_move_for_movmem (dst, src, SImode, offset);
3468 offset = 4;
3469 }
3470 else if (count & ~3)
3471 {
3472 HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
3473 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3474
3475 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
3476 cfun->machine->has_loopreg_clobber = true;
3477 }
3478 if (count & 2)
3479 {
3480 single_move_for_movmem (dst, src, HImode, offset);
3481 offset += 2;
3482 }
3483 }
3484 else
3485 {
3486 if ((count & ~1) == 2)
3487 {
3488 single_move_for_movmem (dst, src, HImode, offset);
3489 offset = 2;
3490 }
3491 else if (count & ~1)
3492 {
3493 HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
3494 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3495
3496 emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
3497 cfun->machine->has_loopreg_clobber = true;
3498 }
3499 }
3500 if (count & 1)
3501 {
3502 single_move_for_movmem (dst, src, QImode, offset);
3503 }
3504 return true;
3505 }
3506 return false;
3507 }
3508 \f
3509 /* Compute the alignment for a local variable.
3510 TYPE is the data type, and ALIGN is the alignment that
3511 the object would ordinarily have. The value of this macro is used
3512 instead of that alignment to align the object. */
3513
3514 int
3515 bfin_local_alignment (tree type, int align)
3516 {
3517 /* Increasing alignment for (relatively) big types allows the builtin
3518 memcpy can use 32 bit loads/stores. */
3519 if (TYPE_SIZE (type)
3520 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
3521 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8
3522 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32)
3523 return 32;
3524 return align;
3525 }
3526 \f
3527 /* Implement TARGET_SCHED_ISSUE_RATE. */
3528
3529 static int
3530 bfin_issue_rate (void)
3531 {
3532 return 3;
3533 }
3534
3535 static int
3536 bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3537 {
3538 enum attr_type insn_type, dep_insn_type;
3539 int dep_insn_code_number;
3540
3541 /* Anti and output dependencies have zero cost. */
3542 if (REG_NOTE_KIND (link) != 0)
3543 return 0;
3544
3545 dep_insn_code_number = recog_memoized (dep_insn);
3546
3547 /* If we can't recognize the insns, we can't really do anything. */
3548 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
3549 return cost;
3550
3551 insn_type = get_attr_type (insn);
3552 dep_insn_type = get_attr_type (dep_insn);
3553
3554 if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
3555 {
3556 rtx pat = PATTERN (dep_insn);
3557 if (GET_CODE (pat) == PARALLEL)
3558 pat = XVECEXP (pat, 0, 0);
3559 rtx dest = SET_DEST (pat);
3560 rtx src = SET_SRC (pat);
3561 if (! ADDRESS_REGNO_P (REGNO (dest))
3562 || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
3563 return cost;
3564 return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
3565 }
3566
3567 return cost;
3568 }
3569 \f
3570 /* This function acts like NEXT_INSN, but is aware of three-insn bundles and
3571 skips all subsequent parallel instructions if INSN is the start of such
3572 a group. */
3573 static rtx
3574 find_next_insn_start (rtx insn)
3575 {
3576 if (GET_MODE (insn) == SImode)
3577 {
3578 while (GET_MODE (insn) != QImode)
3579 insn = NEXT_INSN (insn);
3580 }
3581 return NEXT_INSN (insn);
3582 }
3583
3584 /* This function acts like PREV_INSN, but is aware of three-insn bundles and
3585 skips all subsequent parallel instructions if INSN is the start of such
3586 a group. */
3587 static rtx
3588 find_prev_insn_start (rtx insn)
3589 {
3590 insn = PREV_INSN (insn);
3591 gcc_assert (GET_MODE (insn) != SImode);
3592 if (GET_MODE (insn) == QImode)
3593 {
3594 while (GET_MODE (PREV_INSN (insn)) == SImode)
3595 insn = PREV_INSN (insn);
3596 }
3597 return insn;
3598 }
3599 \f
3600 /* Increment the counter for the number of loop instructions in the
3601 current function. */
3602
3603 void
3604 bfin_hardware_loop (void)
3605 {
3606 cfun->machine->has_hardware_loops++;
3607 }
3608
3609 /* Maximum loop nesting depth. */
3610 #define MAX_LOOP_DEPTH 2
3611
3612 /* Maximum size of a loop. */
3613 #define MAX_LOOP_LENGTH 2042
3614
3615 /* Maximum distance of the LSETUP instruction from the loop start. */
3616 #define MAX_LSETUP_DISTANCE 30
3617
3618 /* We need to keep a vector of loops */
3619 typedef struct loop_info *loop_info;
3620 DEF_VEC_P (loop_info);
3621 DEF_VEC_ALLOC_P (loop_info,heap);
3622
3623 /* Information about a loop we have found (or are in the process of
3624 finding). */
3625 struct GTY (()) loop_info
3626 {
3627 /* loop number, for dumps */
3628 int loop_no;
3629
3630 /* All edges that jump into and out of the loop. */
3631 VEC(edge,gc) *incoming;
3632
3633 /* We can handle two cases: all incoming edges have the same destination
3634 block, or all incoming edges have the same source block. These two
3635 members are set to the common source or destination we found, or NULL
3636 if different blocks were found. If both are NULL the loop can't be
3637 optimized. */
3638 basic_block incoming_src;
3639 basic_block incoming_dest;
3640
3641 /* First block in the loop. This is the one branched to by the loop_end
3642 insn. */
3643 basic_block head;
3644
3645 /* Last block in the loop (the one with the loop_end insn). */
3646 basic_block tail;
3647
3648 /* The successor block of the loop. This is the one the loop_end insn
3649 falls into. */
3650 basic_block successor;
3651
3652 /* The last instruction in the tail. */
3653 rtx last_insn;
3654
3655 /* The loop_end insn. */
3656 rtx loop_end;
3657
3658 /* The iteration register. */
3659 rtx iter_reg;
3660
3661 /* The new label placed at the beginning of the loop. */
3662 rtx start_label;
3663
3664 /* The new label placed at the end of the loop. */
3665 rtx end_label;
3666
3667 /* The length of the loop. */
3668 int length;
3669
3670 /* The nesting depth of the loop. */
3671 int depth;
3672
3673 /* Nonzero if we can't optimize this loop. */
3674 int bad;
3675
3676 /* True if we have visited this loop. */
3677 int visited;
3678
3679 /* True if this loop body clobbers any of LC0, LT0, or LB0. */
3680 int clobber_loop0;
3681
3682 /* True if this loop body clobbers any of LC1, LT1, or LB1. */
3683 int clobber_loop1;
3684
3685 /* Next loop in the graph. */
3686 struct loop_info *next;
3687
3688 /* Immediate outer loop of this loop. */
3689 struct loop_info *outer;
3690
3691 /* Vector of blocks only within the loop, including those within
3692 inner loops. */
3693 VEC (basic_block,heap) *blocks;
3694
3695 /* Same information in a bitmap. */
3696 bitmap block_bitmap;
3697
3698 /* Vector of inner loops within this loop */
3699 VEC (loop_info,heap) *loops;
3700 };
3701
3702 static void
3703 bfin_dump_loops (loop_info loops)
3704 {
3705 loop_info loop;
3706
3707 for (loop = loops; loop; loop = loop->next)
3708 {
3709 loop_info i;
3710 basic_block b;
3711 unsigned ix;
3712
3713 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
3714 if (loop->bad)
3715 fprintf (dump_file, "(bad) ");
3716 fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
3717
3718 fprintf (dump_file, " blocks: [ ");
3719 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
3720 fprintf (dump_file, "%d ", b->index);
3721 fprintf (dump_file, "] ");
3722
3723 fprintf (dump_file, " inner loops: [ ");
3724 for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++)
3725 fprintf (dump_file, "%d ", i->loop_no);
3726 fprintf (dump_file, "]\n");
3727 }
3728 fprintf (dump_file, "\n");
3729 }
3730
3731 /* Scan the blocks of LOOP (and its inferiors) looking for basic block
3732 BB. Return true, if we find it. */
3733
3734 static bool
3735 bfin_bb_in_loop (loop_info loop, basic_block bb)
3736 {
3737 return bitmap_bit_p (loop->block_bitmap, bb->index);
3738 }
3739
3740 /* Scan the blocks of LOOP (and its inferiors) looking for uses of
3741 REG. Return true, if we find any. Don't count the loop's loop_end
3742 insn if it matches LOOP_END. */
3743
3744 static bool
3745 bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
3746 {
3747 unsigned ix;
3748 basic_block bb;
3749
3750 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
3751 {
3752 rtx insn;
3753
3754 for (insn = BB_HEAD (bb);
3755 insn != NEXT_INSN (BB_END (bb));
3756 insn = NEXT_INSN (insn))
3757 {
3758 if (!INSN_P (insn))
3759 continue;
3760 if (insn == loop_end)
3761 continue;
3762 if (reg_mentioned_p (reg, PATTERN (insn)))
3763 return true;
3764 }
3765 }
3766 return false;
3767 }
3768
3769 /* Estimate the length of INSN conservatively. */
3770
3771 static int
3772 length_for_loop (rtx insn)
3773 {
3774 int length = 0;
3775 if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
3776 {
3777 if (ENABLE_WA_SPECULATIVE_SYNCS)
3778 length = 8;
3779 else if (ENABLE_WA_SPECULATIVE_LOADS)
3780 length = 6;
3781 }
3782 else if (LABEL_P (insn))
3783 {
3784 if (ENABLE_WA_SPECULATIVE_SYNCS)
3785 length = 4;
3786 }
3787
3788 if (INSN_P (insn))
3789 length += get_attr_length (insn);
3790
3791 return length;
3792 }
3793
3794 /* Optimize LOOP. */
3795
3796 static void
3797 bfin_optimize_loop (loop_info loop)
3798 {
3799 basic_block bb;
3800 loop_info inner;
3801 rtx insn, last_insn;
3802 rtx loop_init, start_label, end_label;
3803 rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
3804 rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
3805 rtx lc_reg, lt_reg, lb_reg;
3806 rtx seq, seq_end;
3807 int length;
3808 unsigned ix;
3809 int inner_depth = 0;
3810
3811 if (loop->visited)
3812 return;
3813
3814 loop->visited = 1;
3815
3816 if (loop->bad)
3817 {
3818 if (dump_file)
3819 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
3820 goto bad_loop;
3821 }
3822
3823 /* Every loop contains in its list of inner loops every loop nested inside
3824 it, even if there are intermediate loops. This works because we're doing
3825 a depth-first search here and never visit a loop more than once. */
3826 for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
3827 {
3828 bfin_optimize_loop (inner);
3829
3830 if (!inner->bad && inner_depth < inner->depth)
3831 {
3832 inner_depth = inner->depth;
3833
3834 loop->clobber_loop0 |= inner->clobber_loop0;
3835 loop->clobber_loop1 |= inner->clobber_loop1;
3836 }
3837 }
3838
3839 loop->depth = inner_depth + 1;
3840 if (loop->depth > MAX_LOOP_DEPTH)
3841 {
3842 if (dump_file)
3843 fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
3844 goto bad_loop;
3845 }
3846
3847 /* Get the loop iteration register. */
3848 iter_reg = loop->iter_reg;
3849
3850 if (!REG_P (iter_reg))
3851 {
3852 if (dump_file)
3853 fprintf (dump_file, ";; loop %d iteration count not in a register\n",
3854 loop->loop_no);
3855 goto bad_loop;
3856 }
3857 scratchreg = NULL_RTX;
3858 scratch_init = iter_reg;
3859 scratch_init_insn = NULL_RTX;
3860 if (!PREG_P (iter_reg) && loop->incoming_src)
3861 {
3862 basic_block bb_in = loop->incoming_src;
3863 int i;
3864 for (i = REG_P0; i <= REG_P5; i++)
3865 if ((df_regs_ever_live_p (i)
3866 || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
3867 && call_used_regs[i]))
3868 && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
3869 {
3870 scratchreg = gen_rtx_REG (SImode, i);
3871 break;
3872 }
3873 for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
3874 insn = PREV_INSN (insn))
3875 {
3876 rtx set;
3877 if (NOTE_P (insn) || BARRIER_P (insn))
3878 continue;
3879 set = single_set (insn);
3880 if (set && rtx_equal_p (SET_DEST (set), iter_reg))
3881 {
3882 if (CONSTANT_P (SET_SRC (set)))
3883 {
3884 scratch_init = SET_SRC (set);
3885 scratch_init_insn = insn;
3886 }
3887 break;
3888 }
3889 else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
3890 break;
3891 }
3892 }
3893
3894 if (loop->incoming_src)
3895 {
3896 /* Make sure the predecessor is before the loop start label, as required by
3897 the LSETUP instruction. */
3898 length = 0;
3899 insn = BB_END (loop->incoming_src);
3900 /* If we have to insert the LSETUP before a jump, count that jump in the
3901 length. */
3902 if (VEC_length (edge, loop->incoming) > 1
3903 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
3904 {
3905 gcc_assert (JUMP_P (insn));
3906 insn = PREV_INSN (insn);
3907 }
3908
3909 for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
3910 length += length_for_loop (insn);
3911
3912 if (!insn)
3913 {
3914 if (dump_file)
3915 fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
3916 loop->loop_no);
3917 goto bad_loop;
3918 }
3919
3920 /* Account for the pop of a scratch register where necessary. */
3921 if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
3922 && ENABLE_WA_LOAD_LCREGS)
3923 length += 2;
3924
3925 if (length > MAX_LSETUP_DISTANCE)
3926 {
3927 if (dump_file)
3928 fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
3929 goto bad_loop;
3930 }
3931 }
3932
3933 /* Check if start_label appears before loop_end and calculate the
3934 offset between them. We calculate the length of instructions
3935 conservatively. */
3936 length = 0;
3937 for (insn = loop->start_label;
3938 insn && insn != loop->loop_end;
3939 insn = NEXT_INSN (insn))
3940 length += length_for_loop (insn);
3941
3942 if (!insn)
3943 {
3944 if (dump_file)
3945 fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
3946 loop->loop_no);
3947 goto bad_loop;
3948 }
3949
3950 loop->length = length;
3951 if (loop->length > MAX_LOOP_LENGTH)
3952 {
3953 if (dump_file)
3954 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3955 goto bad_loop;
3956 }
3957
3958 /* Scan all the blocks to make sure they don't use iter_reg. */
3959 if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
3960 {
3961 if (dump_file)
3962 fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
3963 goto bad_loop;
3964 }
3965
3966 /* Scan all the insns to see if the loop body clobber
3967 any hardware loop registers. */
3968
3969 reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
3970 reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
3971 reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
3972 reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
3973 reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
3974 reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
3975
3976 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
3977 {
3978 rtx insn;
3979
3980 for (insn = BB_HEAD (bb);
3981 insn != NEXT_INSN (BB_END (bb));
3982 insn = NEXT_INSN (insn))
3983 {
3984 if (!INSN_P (insn))
3985 continue;
3986
3987 if (reg_set_p (reg_lc0, insn)
3988 || reg_set_p (reg_lt0, insn)
3989 || reg_set_p (reg_lb0, insn))
3990 loop->clobber_loop0 = 1;
3991
3992 if (reg_set_p (reg_lc1, insn)
3993 || reg_set_p (reg_lt1, insn)
3994 || reg_set_p (reg_lb1, insn))
3995 loop->clobber_loop1 |= 1;
3996 }
3997 }
3998
3999 if ((loop->clobber_loop0 && loop->clobber_loop1)
4000 || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
4001 {
4002 loop->depth = MAX_LOOP_DEPTH + 1;
4003 if (dump_file)
4004 fprintf (dump_file, ";; loop %d no loop reg available\n",
4005 loop->loop_no);
4006 goto bad_loop;
4007 }
4008
4009 /* There should be an instruction before the loop_end instruction
4010 in the same basic block. And the instruction must not be
4011 - JUMP
4012 - CONDITIONAL BRANCH
4013 - CALL
4014 - CSYNC
4015 - SSYNC
4016 - Returns (RTS, RTN, etc.) */
4017
4018 bb = loop->tail;
4019 last_insn = find_prev_insn_start (loop->loop_end);
4020
4021 while (1)
4022 {
4023 for (; last_insn != BB_HEAD (bb);
4024 last_insn = find_prev_insn_start (last_insn))
4025 if (INSN_P (last_insn))
4026 break;
4027
4028 if (last_insn != BB_HEAD (bb))
4029 break;
4030
4031 if (single_pred_p (bb)
4032 && single_pred_edge (bb)->flags & EDGE_FALLTHRU
4033 && single_pred (bb) != ENTRY_BLOCK_PTR)
4034 {
4035 bb = single_pred (bb);
4036 last_insn = BB_END (bb);
4037 continue;
4038 }
4039 else
4040 {
4041 last_insn = NULL_RTX;
4042 break;
4043 }
4044 }
4045
4046 if (!last_insn)
4047 {
4048 if (dump_file)
4049 fprintf (dump_file, ";; loop %d has no last instruction\n",
4050 loop->loop_no);
4051 goto bad_loop;
4052 }
4053
4054 if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
4055 {
4056 if (dump_file)
4057 fprintf (dump_file, ";; loop %d has bad last instruction\n",
4058 loop->loop_no);
4059 goto bad_loop;
4060 }
4061 /* In all other cases, try to replace a bad last insn with a nop. */
4062 else if (JUMP_P (last_insn)
4063 || CALL_P (last_insn)
4064 || get_attr_type (last_insn) == TYPE_SYNC
4065 || get_attr_type (last_insn) == TYPE_CALL
4066 || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
4067 || recog_memoized (last_insn) == CODE_FOR_return_internal
4068 || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
4069 || asm_noperands (PATTERN (last_insn)) >= 0)
4070 {
4071 if (loop->length + 2 > MAX_LOOP_LENGTH)
4072 {
4073 if (dump_file)
4074 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
4075 goto bad_loop;
4076 }
4077 if (dump_file)
4078 fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
4079 loop->loop_no);
4080
4081 last_insn = emit_insn_after (gen_forced_nop (), last_insn);
4082 }
4083
4084 loop->last_insn = last_insn;
4085
4086 /* The loop is good for replacement. */
4087 start_label = loop->start_label;
4088 end_label = gen_label_rtx ();
4089 iter_reg = loop->iter_reg;
4090
4091 if (loop->depth == 1 && !loop->clobber_loop1)
4092 {
4093 lc_reg = reg_lc1;
4094 lt_reg = reg_lt1;
4095 lb_reg = reg_lb1;
4096 loop->clobber_loop1 = 1;
4097 }
4098 else
4099 {
4100 lc_reg = reg_lc0;
4101 lt_reg = reg_lt0;
4102 lb_reg = reg_lb0;
4103 loop->clobber_loop0 = 1;
4104 }
4105
4106 loop->end_label = end_label;
4107
4108 /* Create a sequence containing the loop setup. */
4109 start_sequence ();
4110
4111 /* LSETUP only accepts P registers. If we have one, we can use it,
4112 otherwise there are several ways of working around the problem.
4113 If we're not affected by anomaly 312, we can load the LC register
4114 from any iteration register, and use LSETUP without initialization.
4115 If we've found a P scratch register that's not live here, we can
4116 instead copy the iter_reg into that and use an initializing LSETUP.
4117 If all else fails, push and pop P0 and use it as a scratch. */
4118 if (P_REGNO_P (REGNO (iter_reg)))
4119 {
4120 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4121 lb_reg, end_label,
4122 lc_reg, iter_reg);
4123 seq_end = emit_insn (loop_init);
4124 }
4125 else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
4126 {
4127 emit_insn (gen_movsi (lc_reg, iter_reg));
4128 loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
4129 lb_reg, end_label,
4130 lc_reg);
4131 seq_end = emit_insn (loop_init);
4132 }
4133 else if (scratchreg != NULL_RTX)
4134 {
4135 emit_insn (gen_movsi (scratchreg, scratch_init));
4136 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4137 lb_reg, end_label,
4138 lc_reg, scratchreg);
4139 seq_end = emit_insn (loop_init);
4140 if (scratch_init_insn != NULL_RTX)
4141 delete_insn (scratch_init_insn);
4142 }
4143 else
4144 {
4145 rtx p0reg = gen_rtx_REG (SImode, REG_P0);
4146 rtx push = gen_frame_mem (SImode,
4147 gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
4148 rtx pop = gen_frame_mem (SImode,
4149 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
4150 emit_insn (gen_movsi (push, p0reg));
4151 emit_insn (gen_movsi (p0reg, scratch_init));
4152 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4153 lb_reg, end_label,
4154 lc_reg, p0reg);
4155 emit_insn (loop_init);
4156 seq_end = emit_insn (gen_movsi (p0reg, pop));
4157 if (scratch_init_insn != NULL_RTX)
4158 delete_insn (scratch_init_insn);
4159 }
4160
4161 if (dump_file)
4162 {
4163 fprintf (dump_file, ";; replacing loop %d initializer with\n",
4164 loop->loop_no);
4165 print_rtl_single (dump_file, loop_init);
4166 fprintf (dump_file, ";; replacing loop %d terminator with\n",
4167 loop->loop_no);
4168 print_rtl_single (dump_file, loop->loop_end);
4169 }
4170
4171 /* If the loop isn't entered at the top, also create a jump to the entry
4172 point. */
4173 if (!loop->incoming_src && loop->head != loop->incoming_dest)
4174 {
4175 rtx label = BB_HEAD (loop->incoming_dest);
4176 /* If we're jumping to the final basic block in the loop, and there's
4177 only one cheap instruction before the end (typically an increment of
4178 an induction variable), we can just emit a copy here instead of a
4179 jump. */
4180 if (loop->incoming_dest == loop->tail
4181 && next_real_insn (label) == last_insn
4182 && asm_noperands (last_insn) < 0
4183 && GET_CODE (PATTERN (last_insn)) == SET)
4184 {
4185 seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
4186 }
4187 else
4188 {
4189 emit_jump_insn (gen_jump (label));
4190 seq_end = emit_barrier ();
4191 }
4192 }
4193
4194 seq = get_insns ();
4195 end_sequence ();
4196
4197 if (loop->incoming_src)
4198 {
4199 rtx prev = BB_END (loop->incoming_src);
4200 if (VEC_length (edge, loop->incoming) > 1
4201 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
4202 {
4203 gcc_assert (JUMP_P (prev));
4204 prev = PREV_INSN (prev);
4205 }
4206 emit_insn_after (seq, prev);
4207 }
4208 else
4209 {
4210 basic_block new_bb;
4211 edge e;
4212 edge_iterator ei;
4213
4214 #ifdef ENABLE_CHECKING
4215 if (loop->head != loop->incoming_dest)
4216 {
4217 /* We aren't entering the loop at the top. Since we've established
4218 that the loop is entered only at one point, this means there
4219 can't be fallthru edges into the head. Any such fallthru edges
4220 would become invalid when we insert the new block, so verify
4221 that this does not in fact happen. */
4222 FOR_EACH_EDGE (e, ei, loop->head->preds)
4223 gcc_assert (!(e->flags & EDGE_FALLTHRU));
4224 }
4225 #endif
4226
4227 emit_insn_before (seq, BB_HEAD (loop->head));
4228 seq = emit_label_before (gen_label_rtx (), seq);
4229
4230 new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
4231 FOR_EACH_EDGE (e, ei, loop->incoming)
4232 {
4233 if (!(e->flags & EDGE_FALLTHRU)
4234 || e->dest != loop->head)
4235 redirect_edge_and_branch_force (e, new_bb);
4236 else
4237 redirect_edge_succ (e, new_bb);
4238 }
4239 e = make_edge (new_bb, loop->head, 0);
4240 }
4241
4242 delete_insn (loop->loop_end);
4243 /* Insert the loop end label before the last instruction of the loop. */
4244 emit_label_before (loop->end_label, loop->last_insn);
4245
4246 return;
4247
4248 bad_loop:
4249
4250 if (dump_file)
4251 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
4252
4253 loop->bad = 1;
4254
4255 if (DPREG_P (loop->iter_reg))
4256 {
4257 /* If loop->iter_reg is a DREG or PREG, we can split it here
4258 without scratch register. */
4259 rtx insn, test;
4260
4261 emit_insn_before (gen_addsi3 (loop->iter_reg,
4262 loop->iter_reg,
4263 constm1_rtx),
4264 loop->loop_end);
4265
4266 test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
4267 insn = emit_jump_insn_before (gen_cbranchsi4 (test,
4268 loop->iter_reg, const0_rtx,
4269 loop->start_label),
4270 loop->loop_end);
4271
4272 JUMP_LABEL (insn) = loop->start_label;
4273 LABEL_NUSES (loop->start_label)++;
4274 delete_insn (loop->loop_end);
4275 }
4276 }
4277
4278 /* Called from bfin_reorg_loops when a potential loop end is found. LOOP is
4279 a newly set up structure describing the loop, it is this function's
4280 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
4281 loop_end insn and its enclosing basic block. */
4282
4283 static void
4284 bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn)
4285 {
4286 unsigned dwork = 0;
4287 basic_block bb;
4288 VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20);
4289
4290 loop->tail = tail_bb;
4291 loop->head = BRANCH_EDGE (tail_bb)->dest;
4292 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
4293 loop->loop_end = tail_insn;
4294 loop->last_insn = NULL_RTX;
4295 loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1));
4296 loop->depth = loop->length = 0;
4297 loop->visited = 0;
4298 loop->clobber_loop0 = loop->clobber_loop1 = 0;
4299 loop->outer = NULL;
4300 loop->loops = NULL;
4301 loop->incoming = VEC_alloc (edge, gc, 2);
4302 loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
4303 loop->end_label = NULL_RTX;
4304 loop->bad = 0;
4305
4306 VEC_safe_push (basic_block, heap, works, loop->head);
4307
4308 while (VEC_iterate (basic_block, works, dwork++, bb))
4309 {
4310 edge e;
4311 edge_iterator ei;
4312 if (bb == EXIT_BLOCK_PTR)
4313 {
4314 /* We've reached the exit block. The loop must be bad. */
4315 if (dump_file)
4316 fprintf (dump_file,
4317 ";; Loop is bad - reached exit block while scanning\n");
4318 loop->bad = 1;
4319 break;
4320 }
4321
4322 if (bitmap_bit_p (loop->block_bitmap, bb->index))
4323 continue;
4324
4325 /* We've not seen this block before. Add it to the loop's
4326 list and then add each successor to the work list. */
4327
4328 VEC_safe_push (basic_block, heap, loop->blocks, bb);
4329 bitmap_set_bit (loop->block_bitmap, bb->index);
4330
4331 if (bb != tail_bb)
4332 {
4333 FOR_EACH_EDGE (e, ei, bb->succs)
4334 {
4335 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
4336 if (!REGNO_REG_SET_P (df_get_live_in (succ),
4337 REGNO (loop->iter_reg)))
4338 continue;
4339 if (!VEC_space (basic_block, works, 1))
4340 {
4341 if (dwork)
4342 {
4343 VEC_block_remove (basic_block, works, 0, dwork);
4344 dwork = 0;
4345 }
4346 else
4347 VEC_reserve (basic_block, heap, works, 1);
4348 }
4349 VEC_quick_push (basic_block, works, succ);
4350 }
4351 }
4352 }
4353
4354 /* Find the predecessor, and make sure nothing else jumps into this loop. */
4355 if (!loop->bad)
4356 {
4357 int pass, retry;
4358 for (dwork = 0; VEC_iterate (basic_block, loop->blocks, dwork, bb); dwork++)
4359 {
4360 edge e;
4361 edge_iterator ei;
4362 FOR_EACH_EDGE (e, ei, bb->preds)
4363 {
4364 basic_block pred = e->src;
4365
4366 if (!bfin_bb_in_loop (loop, pred))
4367 {
4368 if (dump_file)
4369 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
4370 loop->loop_no, pred->index,
4371 e->dest->index);
4372 VEC_safe_push (edge, gc, loop->incoming, e);
4373 }
4374 }
4375 }
4376
4377 for (pass = 0, retry = 1; retry && pass < 2; pass++)
4378 {
4379 edge e;
4380 edge_iterator ei;
4381 bool first = true;
4382 retry = 0;
4383
4384 FOR_EACH_EDGE (e, ei, loop->incoming)
4385 {
4386 if (first)
4387 {
4388 loop->incoming_src = e->src;
4389 loop->incoming_dest = e->dest;
4390 first = false;
4391 }
4392 else
4393 {
4394 if (e->dest != loop->incoming_dest)
4395 loop->incoming_dest = NULL;
4396 if (e->src != loop->incoming_src)
4397 loop->incoming_src = NULL;
4398 }
4399 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
4400 {
4401 if (pass == 0)
4402 {
4403 if (dump_file)
4404 fprintf (dump_file,
4405 ";; retrying loop %d with forwarder blocks\n",
4406 loop->loop_no);
4407 retry = 1;
4408 break;
4409 }
4410 loop->bad = 1;
4411 if (dump_file)
4412 fprintf (dump_file,
4413 ";; can't find suitable entry for loop %d\n",
4414 loop->loop_no);
4415 goto out;
4416 }
4417 }
4418 if (retry)
4419 {
4420 retry = 0;
4421 FOR_EACH_EDGE (e, ei, loop->incoming)
4422 {
4423 if (forwarder_block_p (e->src))
4424 {
4425 edge e2;
4426 edge_iterator ei2;
4427
4428 if (dump_file)
4429 fprintf (dump_file,
4430 ";; Adding forwarder block %d to loop %d and retrying\n",
4431 e->src->index, loop->loop_no);
4432 VEC_safe_push (basic_block, heap, loop->blocks, e->src);
4433 bitmap_set_bit (loop->block_bitmap, e->src->index);
4434 FOR_EACH_EDGE (e2, ei2, e->src->preds)
4435 VEC_safe_push (edge, gc, loop->incoming, e2);
4436 VEC_unordered_remove (edge, loop->incoming, ei.index);
4437 retry = 1;
4438 break;
4439 }
4440 }
4441 if (!retry)
4442 {
4443 if (dump_file)
4444 fprintf (dump_file, ";; No forwarder blocks found\n");
4445 loop->bad = 1;
4446 }
4447 }
4448 }
4449 }
4450
4451 out:
4452 VEC_free (basic_block, heap, works);
4453 }
4454
4455 /* Analyze the structure of the loops in the current function. Use STACK
4456 for bitmap allocations. Returns all the valid candidates for hardware
4457 loops found in this function. */
4458 static loop_info
4459 bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file)
4460 {
4461 loop_info loops = NULL;
4462 loop_info loop;
4463 basic_block bb;
4464 bitmap tmp_bitmap;
4465 int nloops = 0;
4466
4467 /* Find all the possible loop tails. This means searching for every
4468 loop_end instruction. For each one found, create a loop_info
4469 structure and add the head block to the work list. */
4470 FOR_EACH_BB (bb)
4471 {
4472 rtx tail = BB_END (bb);
4473
4474 while (GET_CODE (tail) == NOTE)
4475 tail = PREV_INSN (tail);
4476
4477 bb->aux = NULL;
4478
4479 if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
4480 {
4481 rtx insn;
4482 /* A possible loop end */
4483
4484 /* There's a degenerate case we can handle - an empty loop consisting
4485 of only a back branch. Handle that by deleting the branch. */
4486 insn = BB_HEAD (BRANCH_EDGE (bb)->dest);
4487 if (next_real_insn (insn) == tail)
4488 {
4489 if (dump_file)
4490 {
4491 fprintf (dump_file, ";; degenerate loop ending at\n");
4492 print_rtl_single (dump_file, tail);
4493 }
4494 delete_insn_and_edges (tail);
4495 continue;
4496 }
4497
4498 loop = XNEW (struct loop_info);
4499 loop->next = loops;
4500 loops = loop;
4501 loop->loop_no = nloops++;
4502 loop->blocks = VEC_alloc (basic_block, heap, 20);
4503 loop->block_bitmap = BITMAP_ALLOC (stack);
4504 bb->aux = loop;
4505
4506 if (dump_file)
4507 {
4508 fprintf (dump_file, ";; potential loop %d ending at\n",
4509 loop->loop_no);
4510 print_rtl_single (dump_file, tail);
4511 }
4512
4513 bfin_discover_loop (loop, bb, tail);
4514 }
4515 }
4516
4517 tmp_bitmap = BITMAP_ALLOC (stack);
4518 /* Compute loop nestings. */
4519 for (loop = loops; loop; loop = loop->next)
4520 {
4521 loop_info other;
4522 if (loop->bad)
4523 continue;
4524
4525 for (other = loop->next; other; other = other->next)
4526 {
4527 if (other->bad)
4528 continue;
4529
4530 bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap);
4531 if (bitmap_empty_p (tmp_bitmap))
4532 continue;
4533 if (bitmap_equal_p (tmp_bitmap, other->block_bitmap))
4534 {
4535 other->outer = loop;
4536 VEC_safe_push (loop_info, heap, loop->loops, other);
4537 }
4538 else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap))
4539 {
4540 loop->outer = other;
4541 VEC_safe_push (loop_info, heap, other->loops, loop);
4542 }
4543 else
4544 {
4545 if (dump_file)
4546 fprintf (dump_file,
4547 ";; can't find suitable nesting for loops %d and %d\n",
4548 loop->loop_no, other->loop_no);
4549 loop->bad = other->bad = 1;
4550 }
4551 }
4552 }
4553 BITMAP_FREE (tmp_bitmap);
4554
4555 return loops;
4556 }
4557
4558 /* Free up the loop structures in LOOPS. */
4559 static void
4560 free_loops (loop_info loops)
4561 {
4562 while (loops)
4563 {
4564 loop_info loop = loops;
4565 loops = loop->next;
4566 VEC_free (loop_info, heap, loop->loops);
4567 VEC_free (basic_block, heap, loop->blocks);
4568 BITMAP_FREE (loop->block_bitmap);
4569 XDELETE (loop);
4570 }
4571 }
4572
4573 #define BB_AUX_INDEX(BB) ((unsigned)(BB)->aux)
4574
4575 /* The taken-branch edge from the loop end can actually go forward. Since the
4576 Blackfin's LSETUP instruction requires that the loop end be after the loop
4577 start, try to reorder a loop's basic blocks when we find such a case. */
4578 static void
4579 bfin_reorder_loops (loop_info loops, FILE *dump_file)
4580 {
4581 basic_block bb;
4582 loop_info loop;
4583
4584 FOR_EACH_BB (bb)
4585 bb->aux = NULL;
4586 cfg_layout_initialize (0);
4587
4588 for (loop = loops; loop; loop = loop->next)
4589 {
4590 unsigned index;
4591 basic_block bb;
4592 edge e;
4593 edge_iterator ei;
4594
4595 if (loop->bad)
4596 continue;
4597
4598 /* Recreate an index for basic blocks that represents their order. */
4599 for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0;
4600 bb != EXIT_BLOCK_PTR;
4601 bb = bb->next_bb, index++)
4602 bb->aux = (PTR) index;
4603
4604 if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail))
4605 continue;
4606
4607 FOR_EACH_EDGE (e, ei, loop->head->succs)
4608 {
4609 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
4610 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
4611 {
4612 basic_block start_bb = e->dest;
4613 basic_block start_prev_bb = start_bb->prev_bb;
4614
4615 if (dump_file)
4616 fprintf (dump_file, ";; Moving block %d before block %d\n",
4617 loop->head->index, start_bb->index);
4618 loop->head->prev_bb->next_bb = loop->head->next_bb;
4619 loop->head->next_bb->prev_bb = loop->head->prev_bb;
4620
4621 loop->head->prev_bb = start_prev_bb;
4622 loop->head->next_bb = start_bb;
4623 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
4624 break;
4625 }
4626 }
4627 loops = loops->next;
4628 }
4629
4630 FOR_EACH_BB (bb)
4631 {
4632 if (bb->next_bb != EXIT_BLOCK_PTR)
4633 bb->aux = bb->next_bb;
4634 else
4635 bb->aux = NULL;
4636 }
4637 cfg_layout_finalize ();
4638 df_analyze ();
4639 }
4640
4641 /* Run from machine_dependent_reorg, this pass looks for doloop_end insns
4642 and tries to rewrite the RTL of these loops so that proper Blackfin
4643 hardware loops are generated. */
4644
4645 static void
4646 bfin_reorg_loops (FILE *dump_file)
4647 {
4648 loop_info loops = NULL;
4649 loop_info loop;
4650 basic_block bb;
4651 bitmap_obstack stack;
4652
4653 bitmap_obstack_initialize (&stack);
4654
4655 if (dump_file)
4656 fprintf (dump_file, ";; Find loops, first pass\n\n");
4657
4658 loops = bfin_discover_loops (&stack, dump_file);
4659
4660 if (dump_file)
4661 bfin_dump_loops (loops);
4662
4663 bfin_reorder_loops (loops, dump_file);
4664 free_loops (loops);
4665
4666 if (dump_file)
4667 fprintf (dump_file, ";; Find loops, second pass\n\n");
4668
4669 loops = bfin_discover_loops (&stack, dump_file);
4670 if (dump_file)
4671 {
4672 fprintf (dump_file, ";; All loops found:\n\n");
4673 bfin_dump_loops (loops);
4674 }
4675
4676 /* Now apply the optimizations. */
4677 for (loop = loops; loop; loop = loop->next)
4678 bfin_optimize_loop (loop);
4679
4680 if (dump_file)
4681 {
4682 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
4683 bfin_dump_loops (loops);
4684 }
4685
4686 free_loops (loops);
4687
4688 if (dump_file)
4689 print_rtl (dump_file, get_insns ());
4690
4691 FOR_EACH_BB (bb)
4692 bb->aux = NULL;
4693
4694 splitting_loops = 1;
4695 FOR_EACH_BB (bb)
4696 {
4697 rtx insn = BB_END (bb);
4698 if (!JUMP_P (insn))
4699 continue;
4700
4701 try_split (PATTERN (insn), insn, 1);
4702 }
4703 splitting_loops = 0;
4704 }
4705 \f
4706 /* Possibly generate a SEQUENCE out of three insns found in SLOT.
4707 Returns true if we modified the insn chain, false otherwise. */
4708 static bool
4709 gen_one_bundle (rtx slot[3])
4710 {
4711 gcc_assert (slot[1] != NULL_RTX);
4712
4713 /* Don't add extra NOPs if optimizing for size. */
4714 if (optimize_size
4715 && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
4716 return false;
4717
4718 /* Verify that we really can do the multi-issue. */
4719 if (slot[0])
4720 {
4721 rtx t = NEXT_INSN (slot[0]);
4722 while (t != slot[1])
4723 {
4724 if (GET_CODE (t) != NOTE
4725 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4726 return false;
4727 t = NEXT_INSN (t);
4728 }
4729 }
4730 if (slot[2])
4731 {
4732 rtx t = NEXT_INSN (slot[1]);
4733 while (t != slot[2])
4734 {
4735 if (GET_CODE (t) != NOTE
4736 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4737 return false;
4738 t = NEXT_INSN (t);
4739 }
4740 }
4741
4742 if (slot[0] == NULL_RTX)
4743 {
4744 slot[0] = emit_insn_before (gen_mnop (), slot[1]);
4745 df_insn_rescan (slot[0]);
4746 }
4747 if (slot[2] == NULL_RTX)
4748 {
4749 slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
4750 df_insn_rescan (slot[2]);
4751 }
4752
4753 /* Avoid line number information being printed inside one bundle. */
4754 if (INSN_LOCATOR (slot[1])
4755 && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
4756 INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
4757 if (INSN_LOCATOR (slot[2])
4758 && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
4759 INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
4760
4761 /* Terminate them with "|| " instead of ";" in the output. */
4762 PUT_MODE (slot[0], SImode);
4763 PUT_MODE (slot[1], SImode);
4764 /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */
4765 PUT_MODE (slot[2], QImode);
4766 return true;
4767 }
4768
4769 /* Go through all insns, and use the information generated during scheduling
4770 to generate SEQUENCEs to represent bundles of instructions issued
4771 simultaneously. */
4772
4773 static void
4774 bfin_gen_bundles (void)
4775 {
4776 basic_block bb;
4777 FOR_EACH_BB (bb)
4778 {
4779 rtx insn, next;
4780 rtx slot[3];
4781 int n_filled = 0;
4782
4783 slot[0] = slot[1] = slot[2] = NULL_RTX;
4784 for (insn = BB_HEAD (bb);; insn = next)
4785 {
4786 int at_end;
4787 rtx delete_this = NULL_RTX;
4788
4789 if (INSN_P (insn))
4790 {
4791 enum attr_type type = get_attr_type (insn);
4792
4793 if (type == TYPE_STALL)
4794 {
4795 gcc_assert (n_filled == 0);
4796 delete_this = insn;
4797 }
4798 else
4799 {
4800 if (type == TYPE_DSP32)
4801 slot[0] = insn;
4802 else if (slot[1] == NULL_RTX)
4803 slot[1] = insn;
4804 else
4805 slot[2] = insn;
4806 n_filled++;
4807 }
4808 }
4809
4810 next = NEXT_INSN (insn);
4811 while (next && insn != BB_END (bb)
4812 && !(INSN_P (next)
4813 && GET_CODE (PATTERN (next)) != USE
4814 && GET_CODE (PATTERN (next)) != CLOBBER))
4815 {
4816 insn = next;
4817 next = NEXT_INSN (insn);
4818 }
4819
4820 /* BB_END can change due to emitting extra NOPs, so check here. */
4821 at_end = insn == BB_END (bb);
4822 if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
4823 {
4824 if ((n_filled < 2
4825 || !gen_one_bundle (slot))
4826 && slot[0] != NULL_RTX)
4827 {
4828 rtx pat = PATTERN (slot[0]);
4829 if (GET_CODE (pat) == SET
4830 && GET_CODE (SET_SRC (pat)) == UNSPEC
4831 && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
4832 {
4833 SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
4834 INSN_CODE (slot[0]) = -1;
4835 df_insn_rescan (slot[0]);
4836 }
4837 }
4838 n_filled = 0;
4839 slot[0] = slot[1] = slot[2] = NULL_RTX;
4840 }
4841 if (delete_this != NULL_RTX)
4842 delete_insn (delete_this);
4843 if (at_end)
4844 break;
4845 }
4846 }
4847 }
4848
4849 /* Ensure that no var tracking notes are emitted in the middle of a
4850 three-instruction bundle. */
4851
4852 static void
4853 reorder_var_tracking_notes (void)
4854 {
4855 basic_block bb;
4856 FOR_EACH_BB (bb)
4857 {
4858 rtx insn, next;
4859 rtx queue = NULL_RTX;
4860 bool in_bundle = false;
4861
4862 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
4863 {
4864 next = NEXT_INSN (insn);
4865
4866 if (INSN_P (insn))
4867 {
4868 /* Emit queued up notes at the last instruction of a bundle. */
4869 if (GET_MODE (insn) == QImode)
4870 {
4871 while (queue)
4872 {
4873 rtx next_queue = PREV_INSN (queue);
4874 PREV_INSN (NEXT_INSN (insn)) = queue;
4875 NEXT_INSN (queue) = NEXT_INSN (insn);
4876 NEXT_INSN (insn) = queue;
4877 PREV_INSN (queue) = insn;
4878 queue = next_queue;
4879 }
4880 in_bundle = false;
4881 }
4882 else if (GET_MODE (insn) == SImode)
4883 in_bundle = true;
4884 }
4885 else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
4886 {
4887 if (in_bundle)
4888 {
4889 rtx prev = PREV_INSN (insn);
4890 PREV_INSN (next) = prev;
4891 NEXT_INSN (prev) = next;
4892
4893 PREV_INSN (insn) = queue;
4894 queue = insn;
4895 }
4896 }
4897 }
4898 }
4899 }
4900 \f
4901 /* On some silicon revisions, functions shorter than a certain number of cycles
4902 can cause unpredictable behaviour. Work around this by adding NOPs as
4903 needed. */
4904 static void
4905 workaround_rts_anomaly (void)
4906 {
4907 rtx insn, first_insn = NULL_RTX;
4908 int cycles = 4;
4909
4910 if (! ENABLE_WA_RETS)
4911 return;
4912
4913 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4914 {
4915 rtx pat;
4916
4917 if (BARRIER_P (insn))
4918 return;
4919
4920 if (NOTE_P (insn) || LABEL_P (insn))
4921 continue;
4922
4923 if (first_insn == NULL_RTX)
4924 first_insn = insn;
4925 pat = PATTERN (insn);
4926 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4927 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
4928 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
4929 continue;
4930
4931 if (CALL_P (insn))
4932 return;
4933
4934 if (JUMP_P (insn))
4935 {
4936 if (recog_memoized (insn) == CODE_FOR_return_internal)
4937 break;
4938
4939 /* Nothing to worry about for direct jumps. */
4940 if (!any_condjump_p (insn))
4941 return;
4942 if (cycles <= 1)
4943 return;
4944 cycles--;
4945 }
4946 else if (INSN_P (insn))
4947 {
4948 rtx pat = PATTERN (insn);
4949 int this_cycles = 1;
4950
4951 if (GET_CODE (pat) == PARALLEL)
4952 {
4953 if (push_multiple_operation (pat, VOIDmode)
4954 || pop_multiple_operation (pat, VOIDmode))
4955 this_cycles = n_regs_to_save;
4956 }
4957 else
4958 {
4959 enum insn_code icode = recog_memoized (insn);
4960 if (icode == CODE_FOR_link)
4961 this_cycles = 4;
4962 else if (icode == CODE_FOR_unlink)
4963 this_cycles = 3;
4964 else if (icode == CODE_FOR_mulsi3)
4965 this_cycles = 5;
4966 }
4967 if (this_cycles >= cycles)
4968 return;
4969
4970 cycles -= this_cycles;
4971 }
4972 }
4973 while (cycles > 0)
4974 {
4975 emit_insn_before (gen_nop (), first_insn);
4976 cycles--;
4977 }
4978 }
4979
4980 /* Return an insn type for INSN that can be used by the caller for anomaly
4981 workarounds. This differs from plain get_attr_type in that it handles
4982 SEQUENCEs. */
4983
4984 static enum attr_type
4985 type_for_anomaly (rtx insn)
4986 {
4987 rtx pat = PATTERN (insn);
4988 if (GET_CODE (pat) == SEQUENCE)
4989 {
4990 enum attr_type t;
4991 t = get_attr_type (XVECEXP (pat, 0, 1));
4992 if (t == TYPE_MCLD)
4993 return t;
4994 t = get_attr_type (XVECEXP (pat, 0, 2));
4995 if (t == TYPE_MCLD)
4996 return t;
4997 return TYPE_MCST;
4998 }
4999 else
5000 return get_attr_type (insn);
5001 }
5002
5003 /* Return nonzero if INSN contains any loads that may trap. It handles
5004 SEQUENCEs correctly. */
5005
5006 static bool
5007 trapping_loads_p (rtx insn)
5008 {
5009 rtx pat = PATTERN (insn);
5010 if (GET_CODE (pat) == SEQUENCE)
5011 {
5012 enum attr_type t;
5013 t = get_attr_type (XVECEXP (pat, 0, 1));
5014 if (t == TYPE_MCLD
5015 && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1)))))
5016 return true;
5017 t = get_attr_type (XVECEXP (pat, 0, 2));
5018 if (t == TYPE_MCLD
5019 && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2)))))
5020 return true;
5021 return false;
5022 }
5023 else
5024 return may_trap_p (SET_SRC (single_set (insn)));
5025 }
5026
5027 /* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of
5028 a three-insn bundle, see if one of them is a load and return that if so.
5029 Return NULL_RTX if the insn does not contain loads. */
5030 static rtx
5031 find_load (rtx insn)
5032 {
5033 if (get_attr_type (insn) == TYPE_MCLD)
5034 return insn;
5035 if (GET_MODE (insn) != SImode)
5036 return NULL_RTX;
5037 do {
5038 insn = NEXT_INSN (insn);
5039 if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
5040 && get_attr_type (insn) == TYPE_MCLD)
5041 return insn;
5042 } while (GET_MODE (insn) != QImode);
5043 return NULL_RTX;
5044 }
5045
5046 /* Determine whether PAT is an indirect call pattern. */
5047 static bool
5048 indirect_call_p (rtx pat)
5049 {
5050 if (GET_CODE (pat) == PARALLEL)
5051 pat = XVECEXP (pat, 0, 0);
5052 if (GET_CODE (pat) == SET)
5053 pat = SET_SRC (pat);
5054 gcc_assert (GET_CODE (pat) == CALL);
5055 pat = XEXP (pat, 0);
5056 gcc_assert (GET_CODE (pat) == MEM);
5057 pat = XEXP (pat, 0);
5058
5059 return REG_P (pat);
5060 }
5061
5062 static void
5063 workaround_speculation (void)
5064 {
5065 rtx insn, next;
5066 rtx last_condjump = NULL_RTX;
5067 int cycles_since_jump = INT_MAX;
5068 int delay_added = 0;
5069
5070 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
5071 && ! ENABLE_WA_INDIRECT_CALLS)
5072 return;
5073
5074 /* First pass: find predicted-false branches; if something after them
5075 needs nops, insert them or change the branch to predict true. */
5076 for (insn = get_insns (); insn; insn = next)
5077 {
5078 rtx pat;
5079 int delay_needed = 0;
5080
5081 next = find_next_insn_start (insn);
5082
5083 if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
5084 continue;
5085
5086 pat = PATTERN (insn);
5087 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5088 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5089 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5090 continue;
5091
5092 if (JUMP_P (insn))
5093 {
5094 if (any_condjump_p (insn)
5095 && ! cbranch_predicted_taken_p (insn))
5096 {
5097 last_condjump = insn;
5098 delay_added = 0;
5099 cycles_since_jump = 0;
5100 }
5101 else
5102 cycles_since_jump = INT_MAX;
5103 }
5104 else if (CALL_P (insn))
5105 {
5106 if (cycles_since_jump < INT_MAX)
5107 cycles_since_jump++;
5108 if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
5109 {
5110 delay_needed = 3;
5111 }
5112 }
5113 else if (INSN_P (insn))
5114 {
5115 rtx load_insn = find_load (insn);
5116 enum attr_type type = type_for_anomaly (insn);
5117
5118 if (cycles_since_jump < INT_MAX)
5119 cycles_since_jump++;
5120
5121 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
5122 {
5123 if (trapping_loads_p (load_insn))
5124 delay_needed = 4;
5125 }
5126 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
5127 delay_needed = 3;
5128 }
5129
5130 if (delay_needed > cycles_since_jump
5131 && (delay_needed - cycles_since_jump) > delay_added)
5132 {
5133 rtx pat1;
5134 int num_clobbers;
5135 rtx *op = recog_data.operand;
5136
5137 delay_needed -= cycles_since_jump;
5138
5139 extract_insn (last_condjump);
5140 if (optimize_size)
5141 {
5142 pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
5143 op[3]);
5144 cycles_since_jump = INT_MAX;
5145 }
5146 else
5147 {
5148 /* Do not adjust cycles_since_jump in this case, so that
5149 we'll increase the number of NOPs for a subsequent insn
5150 if necessary. */
5151 pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
5152 GEN_INT (delay_needed));
5153 delay_added = delay_needed;
5154 }
5155 PATTERN (last_condjump) = pat1;
5156 INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
5157 }
5158 if (CALL_P (insn))
5159 {
5160 cycles_since_jump = INT_MAX;
5161 delay_added = 0;
5162 }
5163 }
5164
5165 /* Second pass: for predicted-true branches, see if anything at the
5166 branch destination needs extra nops. */
5167 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5168 {
5169 int cycles_since_jump;
5170 if (JUMP_P (insn)
5171 && any_condjump_p (insn)
5172 && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
5173 || cbranch_predicted_taken_p (insn)))
5174 {
5175 rtx target = JUMP_LABEL (insn);
5176 rtx label = target;
5177 rtx next_tgt;
5178
5179 cycles_since_jump = 0;
5180 for (; target && cycles_since_jump < 3; target = next_tgt)
5181 {
5182 rtx pat;
5183
5184 next_tgt = find_next_insn_start (target);
5185
5186 if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
5187 continue;
5188
5189 pat = PATTERN (target);
5190 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5191 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5192 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5193 continue;
5194
5195 if (INSN_P (target))
5196 {
5197 rtx load_insn = find_load (target);
5198 enum attr_type type = type_for_anomaly (target);
5199 int delay_needed = 0;
5200 if (cycles_since_jump < INT_MAX)
5201 cycles_since_jump++;
5202
5203 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
5204 {
5205 if (trapping_loads_p (load_insn))
5206 delay_needed = 2;
5207 }
5208 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
5209 delay_needed = 2;
5210
5211 if (delay_needed > cycles_since_jump)
5212 {
5213 rtx prev = prev_real_insn (label);
5214 delay_needed -= cycles_since_jump;
5215 if (dump_file)
5216 fprintf (dump_file, "Adding %d nops after %d\n",
5217 delay_needed, INSN_UID (label));
5218 if (JUMP_P (prev)
5219 && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
5220 {
5221 rtx x;
5222 HOST_WIDE_INT v;
5223
5224 if (dump_file)
5225 fprintf (dump_file,
5226 "Reducing nops on insn %d.\n",
5227 INSN_UID (prev));
5228 x = PATTERN (prev);
5229 x = XVECEXP (x, 0, 1);
5230 v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
5231 XVECEXP (x, 0, 0) = GEN_INT (v);
5232 }
5233 while (delay_needed-- > 0)
5234 emit_insn_after (gen_nop (), label);
5235 break;
5236 }
5237 }
5238 }
5239 }
5240 }
5241 }
5242
5243 /* Called just before the final scheduling pass. If we need to insert NOPs
5244 later on to work around speculative loads, insert special placeholder
5245 insns that cause loads to be delayed for as many cycles as necessary
5246 (and possible). This reduces the number of NOPs we need to add.
5247 The dummy insns we generate are later removed by bfin_gen_bundles. */
5248 static void
5249 add_sched_insns_for_speculation (void)
5250 {
5251 rtx insn;
5252
5253 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
5254 && ! ENABLE_WA_INDIRECT_CALLS)
5255 return;
5256
5257 /* First pass: find predicted-false branches; if something after them
5258 needs nops, insert them or change the branch to predict true. */
5259 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5260 {
5261 rtx pat;
5262
5263 if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
5264 continue;
5265
5266 pat = PATTERN (insn);
5267 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5268 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5269 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5270 continue;
5271
5272 if (JUMP_P (insn))
5273 {
5274 if (any_condjump_p (insn)
5275 && !cbranch_predicted_taken_p (insn))
5276 {
5277 rtx n = next_real_insn (insn);
5278 emit_insn_before (gen_stall (GEN_INT (3)), n);
5279 }
5280 }
5281 }
5282
5283 /* Second pass: for predicted-true branches, see if anything at the
5284 branch destination needs extra nops. */
5285 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5286 {
5287 if (JUMP_P (insn)
5288 && any_condjump_p (insn)
5289 && (cbranch_predicted_taken_p (insn)))
5290 {
5291 rtx target = JUMP_LABEL (insn);
5292 rtx next = next_real_insn (target);
5293
5294 if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
5295 && get_attr_type (next) == TYPE_STALL)
5296 continue;
5297 emit_insn_before (gen_stall (GEN_INT (1)), next);
5298 }
5299 }
5300 }
5301
5302 /* We use the machine specific reorg pass for emitting CSYNC instructions
5303 after conditional branches as needed.
5304
5305 The Blackfin is unusual in that a code sequence like
5306 if cc jump label
5307 r0 = (p0)
5308 may speculatively perform the load even if the condition isn't true. This
5309 happens for a branch that is predicted not taken, because the pipeline
5310 isn't flushed or stalled, so the early stages of the following instructions,
5311 which perform the memory reference, are allowed to execute before the
5312 jump condition is evaluated.
5313 Therefore, we must insert additional instructions in all places where this
5314 could lead to incorrect behavior. The manual recommends CSYNC, while
5315 VDSP seems to use NOPs (even though its corresponding compiler option is
5316 named CSYNC).
5317
5318 When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
5319 When optimizing for size, we turn the branch into a predicted taken one.
5320 This may be slower due to mispredicts, but saves code size. */
5321
5322 static void
5323 bfin_reorg (void)
5324 {
5325 /* We are freeing block_for_insn in the toplev to keep compatibility
5326 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5327 compute_bb_for_insn ();
5328
5329 if (bfin_flag_schedule_insns2)
5330 {
5331 splitting_for_sched = 1;
5332 split_all_insns ();
5333 splitting_for_sched = 0;
5334
5335 add_sched_insns_for_speculation ();
5336
5337 timevar_push (TV_SCHED2);
5338 schedule_insns ();
5339 timevar_pop (TV_SCHED2);
5340
5341 /* Examine the schedule and insert nops as necessary for 64-bit parallel
5342 instructions. */
5343 bfin_gen_bundles ();
5344 }
5345
5346 df_analyze ();
5347
5348 /* Doloop optimization */
5349 if (cfun->machine->has_hardware_loops)
5350 bfin_reorg_loops (dump_file);
5351
5352 workaround_speculation ();
5353
5354 if (bfin_flag_var_tracking)
5355 {
5356 timevar_push (TV_VAR_TRACKING);
5357 variable_tracking_main ();
5358 reorder_var_tracking_notes ();
5359 timevar_pop (TV_VAR_TRACKING);
5360 }
5361
5362 df_finish_pass (false);
5363
5364 workaround_rts_anomaly ();
5365 }
5366 \f
5367 /* Handle interrupt_handler, exception_handler and nmi_handler function
5368 attributes; arguments as in struct attribute_spec.handler. */
5369
5370 static tree
5371 handle_int_attribute (tree *node, tree name,
5372 tree args ATTRIBUTE_UNUSED,
5373 int flags ATTRIBUTE_UNUSED,
5374 bool *no_add_attrs)
5375 {
5376 tree x = *node;
5377 if (TREE_CODE (x) == FUNCTION_DECL)
5378 x = TREE_TYPE (x);
5379
5380 if (TREE_CODE (x) != FUNCTION_TYPE)
5381 {
5382 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5383 name);
5384 *no_add_attrs = true;
5385 }
5386 else if (funkind (x) != SUBROUTINE)
5387 error ("multiple function type attributes specified");
5388
5389 return NULL_TREE;
5390 }
5391
5392 /* Return 0 if the attributes for two types are incompatible, 1 if they
5393 are compatible, and 2 if they are nearly compatible (which causes a
5394 warning to be generated). */
5395
5396 static int
5397 bfin_comp_type_attributes (const_tree type1, const_tree type2)
5398 {
5399 e_funkind kind1, kind2;
5400
5401 if (TREE_CODE (type1) != FUNCTION_TYPE)
5402 return 1;
5403
5404 kind1 = funkind (type1);
5405 kind2 = funkind (type2);
5406
5407 if (kind1 != kind2)
5408 return 0;
5409
5410 /* Check for mismatched modifiers */
5411 if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
5412 != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
5413 return 0;
5414
5415 if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
5416 != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
5417 return 0;
5418
5419 if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
5420 != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
5421 return 0;
5422
5423 if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
5424 != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
5425 return 0;
5426
5427 return 1;
5428 }
5429
5430 /* Handle a "longcall" or "shortcall" attribute; arguments as in
5431 struct attribute_spec.handler. */
5432
5433 static tree
5434 bfin_handle_longcall_attribute (tree *node, tree name,
5435 tree args ATTRIBUTE_UNUSED,
5436 int flags ATTRIBUTE_UNUSED,
5437 bool *no_add_attrs)
5438 {
5439 if (TREE_CODE (*node) != FUNCTION_TYPE
5440 && TREE_CODE (*node) != FIELD_DECL
5441 && TREE_CODE (*node) != TYPE_DECL)
5442 {
5443 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5444 name);
5445 *no_add_attrs = true;
5446 }
5447
5448 if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
5449 && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
5450 || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
5451 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
5452 {
5453 warning (OPT_Wattributes,
5454 "can't apply both longcall and shortcall attributes to the same function");
5455 *no_add_attrs = true;
5456 }
5457
5458 return NULL_TREE;
5459 }
5460
5461 /* Handle a "l1_text" attribute; arguments as in
5462 struct attribute_spec.handler. */
5463
5464 static tree
5465 bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5466 int ARG_UNUSED (flags), bool *no_add_attrs)
5467 {
5468 tree decl = *node;
5469
5470 if (TREE_CODE (decl) != FUNCTION_DECL)
5471 {
5472 error ("%qE attribute only applies to functions",
5473 name);
5474 *no_add_attrs = true;
5475 }
5476
5477 /* The decl may have already been given a section attribute
5478 from a previous declaration. Ensure they match. */
5479 else if (DECL_SECTION_NAME (decl) != NULL_TREE
5480 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5481 ".l1.text") != 0)
5482 {
5483 error ("section of %q+D conflicts with previous declaration",
5484 decl);
5485 *no_add_attrs = true;
5486 }
5487 else
5488 DECL_SECTION_NAME (decl) = build_string (9, ".l1.text");
5489
5490 return NULL_TREE;
5491 }
5492
5493 /* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
5494 arguments as in struct attribute_spec.handler. */
5495
5496 static tree
5497 bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5498 int ARG_UNUSED (flags), bool *no_add_attrs)
5499 {
5500 tree decl = *node;
5501
5502 if (TREE_CODE (decl) != VAR_DECL)
5503 {
5504 error ("%qE attribute only applies to variables",
5505 name);
5506 *no_add_attrs = true;
5507 }
5508 else if (current_function_decl != NULL_TREE
5509 && !TREE_STATIC (decl))
5510 {
5511 error ("%qE attribute cannot be specified for local variables",
5512 name);
5513 *no_add_attrs = true;
5514 }
5515 else
5516 {
5517 const char *section_name;
5518
5519 if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
5520 section_name = ".l1.data";
5521 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
5522 section_name = ".l1.data.A";
5523 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
5524 section_name = ".l1.data.B";
5525 else
5526 gcc_unreachable ();
5527
5528 /* The decl may have already been given a section attribute
5529 from a previous declaration. Ensure they match. */
5530 if (DECL_SECTION_NAME (decl) != NULL_TREE
5531 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5532 section_name) != 0)
5533 {
5534 error ("section of %q+D conflicts with previous declaration",
5535 decl);
5536 *no_add_attrs = true;
5537 }
5538 else
5539 DECL_SECTION_NAME (decl)
5540 = build_string (strlen (section_name) + 1, section_name);
5541 }
5542
5543 return NULL_TREE;
5544 }
5545
5546 /* Table of valid machine attributes. */
5547 static const struct attribute_spec bfin_attribute_table[] =
5548 {
5549 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5550 { "interrupt_handler", 0, 0, false, true, true, handle_int_attribute },
5551 { "exception_handler", 0, 0, false, true, true, handle_int_attribute },
5552 { "nmi_handler", 0, 0, false, true, true, handle_int_attribute },
5553 { "nesting", 0, 0, false, true, true, NULL },
5554 { "kspisusp", 0, 0, false, true, true, NULL },
5555 { "saveall", 0, 0, false, true, true, NULL },
5556 { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute },
5557 { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute },
5558 { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute },
5559 { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5560 { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5561 { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5562 { NULL, 0, 0, false, false, false, NULL }
5563 };
5564 \f
5565 /* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to
5566 tell the assembler to generate pointers to function descriptors in
5567 some cases. */
5568
5569 static bool
5570 bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
5571 {
5572 if (TARGET_FDPIC && size == UNITS_PER_WORD)
5573 {
5574 if (GET_CODE (value) == SYMBOL_REF
5575 && SYMBOL_REF_FUNCTION_P (value))
5576 {
5577 fputs ("\t.picptr\tfuncdesc(", asm_out_file);
5578 output_addr_const (asm_out_file, value);
5579 fputs (")\n", asm_out_file);
5580 return true;
5581 }
5582 if (!aligned_p)
5583 {
5584 /* We've set the unaligned SI op to NULL, so we always have to
5585 handle the unaligned case here. */
5586 assemble_integer_with_op ("\t.4byte\t", value);
5587 return true;
5588 }
5589 }
5590 return default_assemble_integer (value, size, aligned_p);
5591 }
5592 \f
5593 /* Output the assembler code for a thunk function. THUNK_DECL is the
5594 declaration for the thunk function itself, FUNCTION is the decl for
5595 the target function. DELTA is an immediate constant offset to be
5596 added to THIS. If VCALL_OFFSET is nonzero, the word at
5597 *(*this + vcall_offset) should be added to THIS. */
5598
5599 static void
5600 bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
5601 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
5602 HOST_WIDE_INT vcall_offset, tree function)
5603 {
5604 rtx xops[3];
5605 /* The this parameter is passed as the first argument. */
5606 rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
5607
5608 /* Adjust the this parameter by a fixed constant. */
5609 if (delta)
5610 {
5611 xops[1] = this_rtx;
5612 if (delta >= -64 && delta <= 63)
5613 {
5614 xops[0] = GEN_INT (delta);
5615 output_asm_insn ("%1 += %0;", xops);
5616 }
5617 else if (delta >= -128 && delta < -64)
5618 {
5619 xops[0] = GEN_INT (delta + 64);
5620 output_asm_insn ("%1 += -64; %1 += %0;", xops);
5621 }
5622 else if (delta > 63 && delta <= 126)
5623 {
5624 xops[0] = GEN_INT (delta - 63);
5625 output_asm_insn ("%1 += 63; %1 += %0;", xops);
5626 }
5627 else
5628 {
5629 xops[0] = GEN_INT (delta);
5630 output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
5631 }
5632 }
5633
5634 /* Adjust the this parameter by a value stored in the vtable. */
5635 if (vcall_offset)
5636 {
5637 rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
5638 rtx tmp = gen_rtx_REG (Pmode, REG_R3);
5639
5640 xops[1] = tmp;
5641 xops[2] = p2tmp;
5642 output_asm_insn ("%2 = r0; %2 = [%2];", xops);
5643
5644 /* Adjust the this parameter. */
5645 xops[0] = gen_rtx_MEM (Pmode, plus_constant (p2tmp, vcall_offset));
5646 if (!memory_operand (xops[0], Pmode))
5647 {
5648 rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
5649 xops[0] = GEN_INT (vcall_offset);
5650 xops[1] = tmp2;
5651 output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
5652 xops[0] = gen_rtx_MEM (Pmode, p2tmp);
5653 }
5654 xops[2] = this_rtx;
5655 output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
5656 }
5657
5658 xops[0] = XEXP (DECL_RTL (function), 0);
5659 if (1 || !flag_pic || (*targetm.binds_local_p) (function))
5660 output_asm_insn ("jump.l\t%P0", xops);
5661 }
5662 \f
5663 /* Codes for all the Blackfin builtins. */
5664 enum bfin_builtins
5665 {
5666 BFIN_BUILTIN_CSYNC,
5667 BFIN_BUILTIN_SSYNC,
5668 BFIN_BUILTIN_ONES,
5669 BFIN_BUILTIN_COMPOSE_2X16,
5670 BFIN_BUILTIN_EXTRACTLO,
5671 BFIN_BUILTIN_EXTRACTHI,
5672
5673 BFIN_BUILTIN_SSADD_2X16,
5674 BFIN_BUILTIN_SSSUB_2X16,
5675 BFIN_BUILTIN_SSADDSUB_2X16,
5676 BFIN_BUILTIN_SSSUBADD_2X16,
5677 BFIN_BUILTIN_MULT_2X16,
5678 BFIN_BUILTIN_MULTR_2X16,
5679 BFIN_BUILTIN_NEG_2X16,
5680 BFIN_BUILTIN_ABS_2X16,
5681 BFIN_BUILTIN_MIN_2X16,
5682 BFIN_BUILTIN_MAX_2X16,
5683
5684 BFIN_BUILTIN_SSADD_1X16,
5685 BFIN_BUILTIN_SSSUB_1X16,
5686 BFIN_BUILTIN_MULT_1X16,
5687 BFIN_BUILTIN_MULTR_1X16,
5688 BFIN_BUILTIN_NORM_1X16,
5689 BFIN_BUILTIN_NEG_1X16,
5690 BFIN_BUILTIN_ABS_1X16,
5691 BFIN_BUILTIN_MIN_1X16,
5692 BFIN_BUILTIN_MAX_1X16,
5693
5694 BFIN_BUILTIN_SUM_2X16,
5695 BFIN_BUILTIN_DIFFHL_2X16,
5696 BFIN_BUILTIN_DIFFLH_2X16,
5697
5698 BFIN_BUILTIN_SSADD_1X32,
5699 BFIN_BUILTIN_SSSUB_1X32,
5700 BFIN_BUILTIN_NORM_1X32,
5701 BFIN_BUILTIN_ROUND_1X32,
5702 BFIN_BUILTIN_NEG_1X32,
5703 BFIN_BUILTIN_ABS_1X32,
5704 BFIN_BUILTIN_MIN_1X32,
5705 BFIN_BUILTIN_MAX_1X32,
5706 BFIN_BUILTIN_MULT_1X32,
5707 BFIN_BUILTIN_MULT_1X32X32,
5708 BFIN_BUILTIN_MULT_1X32X32NS,
5709
5710 BFIN_BUILTIN_MULHISILL,
5711 BFIN_BUILTIN_MULHISILH,
5712 BFIN_BUILTIN_MULHISIHL,
5713 BFIN_BUILTIN_MULHISIHH,
5714
5715 BFIN_BUILTIN_LSHIFT_1X16,
5716 BFIN_BUILTIN_LSHIFT_2X16,
5717 BFIN_BUILTIN_SSASHIFT_1X16,
5718 BFIN_BUILTIN_SSASHIFT_2X16,
5719 BFIN_BUILTIN_SSASHIFT_1X32,
5720
5721 BFIN_BUILTIN_CPLX_MUL_16,
5722 BFIN_BUILTIN_CPLX_MAC_16,
5723 BFIN_BUILTIN_CPLX_MSU_16,
5724
5725 BFIN_BUILTIN_CPLX_MUL_16_S40,
5726 BFIN_BUILTIN_CPLX_MAC_16_S40,
5727 BFIN_BUILTIN_CPLX_MSU_16_S40,
5728
5729 BFIN_BUILTIN_CPLX_SQU,
5730
5731 BFIN_BUILTIN_LOADBYTES,
5732
5733 BFIN_BUILTIN_MAX
5734 };
5735
5736 #define def_builtin(NAME, TYPE, CODE) \
5737 do { \
5738 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
5739 NULL, NULL_TREE); \
5740 } while (0)
5741
5742 /* Set up all builtin functions for this target. */
5743 static void
5744 bfin_init_builtins (void)
5745 {
5746 tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
5747 tree void_ftype_void
5748 = build_function_type (void_type_node, void_list_node);
5749 tree short_ftype_short
5750 = build_function_type_list (short_integer_type_node, short_integer_type_node,
5751 NULL_TREE);
5752 tree short_ftype_int_int
5753 = build_function_type_list (short_integer_type_node, integer_type_node,
5754 integer_type_node, NULL_TREE);
5755 tree int_ftype_int_int
5756 = build_function_type_list (integer_type_node, integer_type_node,
5757 integer_type_node, NULL_TREE);
5758 tree int_ftype_int
5759 = build_function_type_list (integer_type_node, integer_type_node,
5760 NULL_TREE);
5761 tree short_ftype_int
5762 = build_function_type_list (short_integer_type_node, integer_type_node,
5763 NULL_TREE);
5764 tree int_ftype_v2hi_v2hi
5765 = build_function_type_list (integer_type_node, V2HI_type_node,
5766 V2HI_type_node, NULL_TREE);
5767 tree v2hi_ftype_v2hi_v2hi
5768 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5769 V2HI_type_node, NULL_TREE);
5770 tree v2hi_ftype_v2hi_v2hi_v2hi
5771 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5772 V2HI_type_node, V2HI_type_node, NULL_TREE);
5773 tree v2hi_ftype_int_int
5774 = build_function_type_list (V2HI_type_node, integer_type_node,
5775 integer_type_node, NULL_TREE);
5776 tree v2hi_ftype_v2hi_int
5777 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5778 integer_type_node, NULL_TREE);
5779 tree int_ftype_short_short
5780 = build_function_type_list (integer_type_node, short_integer_type_node,
5781 short_integer_type_node, NULL_TREE);
5782 tree v2hi_ftype_v2hi
5783 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
5784 tree short_ftype_v2hi
5785 = build_function_type_list (short_integer_type_node, V2HI_type_node,
5786 NULL_TREE);
5787 tree int_ftype_pint
5788 = build_function_type_list (integer_type_node,
5789 build_pointer_type (integer_type_node),
5790 NULL_TREE);
5791
5792 /* Add the remaining MMX insns with somewhat more complicated types. */
5793 def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
5794 def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
5795
5796 def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
5797
5798 def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
5799 BFIN_BUILTIN_COMPOSE_2X16);
5800 def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
5801 BFIN_BUILTIN_EXTRACTHI);
5802 def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
5803 BFIN_BUILTIN_EXTRACTLO);
5804
5805 def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
5806 BFIN_BUILTIN_MIN_2X16);
5807 def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
5808 BFIN_BUILTIN_MAX_2X16);
5809
5810 def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
5811 BFIN_BUILTIN_SSADD_2X16);
5812 def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
5813 BFIN_BUILTIN_SSSUB_2X16);
5814 def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
5815 BFIN_BUILTIN_SSADDSUB_2X16);
5816 def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
5817 BFIN_BUILTIN_SSSUBADD_2X16);
5818 def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
5819 BFIN_BUILTIN_MULT_2X16);
5820 def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
5821 BFIN_BUILTIN_MULTR_2X16);
5822 def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
5823 BFIN_BUILTIN_NEG_2X16);
5824 def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
5825 BFIN_BUILTIN_ABS_2X16);
5826
5827 def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
5828 BFIN_BUILTIN_MIN_1X16);
5829 def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
5830 BFIN_BUILTIN_MAX_1X16);
5831
5832 def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
5833 BFIN_BUILTIN_SSADD_1X16);
5834 def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
5835 BFIN_BUILTIN_SSSUB_1X16);
5836 def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
5837 BFIN_BUILTIN_MULT_1X16);
5838 def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
5839 BFIN_BUILTIN_MULTR_1X16);
5840 def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
5841 BFIN_BUILTIN_NEG_1X16);
5842 def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
5843 BFIN_BUILTIN_ABS_1X16);
5844 def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
5845 BFIN_BUILTIN_NORM_1X16);
5846
5847 def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
5848 BFIN_BUILTIN_SUM_2X16);
5849 def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
5850 BFIN_BUILTIN_DIFFHL_2X16);
5851 def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
5852 BFIN_BUILTIN_DIFFLH_2X16);
5853
5854 def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
5855 BFIN_BUILTIN_MULHISILL);
5856 def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
5857 BFIN_BUILTIN_MULHISIHL);
5858 def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
5859 BFIN_BUILTIN_MULHISILH);
5860 def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
5861 BFIN_BUILTIN_MULHISIHH);
5862
5863 def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
5864 BFIN_BUILTIN_MIN_1X32);
5865 def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
5866 BFIN_BUILTIN_MAX_1X32);
5867
5868 def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
5869 BFIN_BUILTIN_SSADD_1X32);
5870 def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
5871 BFIN_BUILTIN_SSSUB_1X32);
5872 def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
5873 BFIN_BUILTIN_NEG_1X32);
5874 def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
5875 BFIN_BUILTIN_ABS_1X32);
5876 def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
5877 BFIN_BUILTIN_NORM_1X32);
5878 def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
5879 BFIN_BUILTIN_ROUND_1X32);
5880 def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
5881 BFIN_BUILTIN_MULT_1X32);
5882 def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
5883 BFIN_BUILTIN_MULT_1X32X32);
5884 def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
5885 BFIN_BUILTIN_MULT_1X32X32NS);
5886
5887 /* Shifts. */
5888 def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
5889 BFIN_BUILTIN_SSASHIFT_1X16);
5890 def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
5891 BFIN_BUILTIN_SSASHIFT_2X16);
5892 def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
5893 BFIN_BUILTIN_LSHIFT_1X16);
5894 def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
5895 BFIN_BUILTIN_LSHIFT_2X16);
5896 def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
5897 BFIN_BUILTIN_SSASHIFT_1X32);
5898
5899 /* Complex numbers. */
5900 def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
5901 BFIN_BUILTIN_SSADD_2X16);
5902 def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
5903 BFIN_BUILTIN_SSSUB_2X16);
5904 def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
5905 BFIN_BUILTIN_CPLX_MUL_16);
5906 def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
5907 BFIN_BUILTIN_CPLX_MAC_16);
5908 def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
5909 BFIN_BUILTIN_CPLX_MSU_16);
5910 def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
5911 BFIN_BUILTIN_CPLX_MUL_16_S40);
5912 def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5913 BFIN_BUILTIN_CPLX_MAC_16_S40);
5914 def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5915 BFIN_BUILTIN_CPLX_MSU_16_S40);
5916 def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
5917 BFIN_BUILTIN_CPLX_SQU);
5918
5919 /* "Unaligned" load. */
5920 def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
5921 BFIN_BUILTIN_LOADBYTES);
5922
5923 }
5924
5925
5926 struct builtin_description
5927 {
5928 const enum insn_code icode;
5929 const char *const name;
5930 const enum bfin_builtins code;
5931 int macflag;
5932 };
5933
5934 static const struct builtin_description bdesc_2arg[] =
5935 {
5936 { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
5937
5938 { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
5939 { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
5940 { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
5941 { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
5942 { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
5943
5944 { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
5945 { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
5946 { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
5947 { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
5948
5949 { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
5950 { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
5951 { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
5952 { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
5953
5954 { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
5955 { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
5956 { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
5957 { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
5958 { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
5959 { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
5960
5961 { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
5962 { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
5963 { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
5964 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
5965 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
5966
5967 { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
5968 { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
5969 { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
5970 { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
5971
5972 };
5973
5974 static const struct builtin_description bdesc_1arg[] =
5975 {
5976 { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
5977
5978 { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
5979
5980 { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
5981 { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
5982 { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
5983
5984 { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
5985 { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
5986 { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
5987 { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
5988
5989 { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
5990 { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
5991 { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
5992 { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
5993 };
5994
5995 /* Errors in the source file can cause expand_expr to return const0_rtx
5996 where we expect a vector. To avoid crashing, use one of the vector
5997 clear instructions. */
5998 static rtx
5999 safe_vector_operand (rtx x, enum machine_mode mode)
6000 {
6001 if (x != const0_rtx)
6002 return x;
6003 x = gen_reg_rtx (SImode);
6004
6005 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6006 return gen_lowpart (mode, x);
6007 }
6008
6009 /* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1
6010 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6011
6012 static rtx
6013 bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6014 int macflag)
6015 {
6016 rtx pat;
6017 tree arg0 = CALL_EXPR_ARG (exp, 0);
6018 tree arg1 = CALL_EXPR_ARG (exp, 1);
6019 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6020 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6021 enum machine_mode op0mode = GET_MODE (op0);
6022 enum machine_mode op1mode = GET_MODE (op1);
6023 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6024 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6025 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
6026
6027 if (VECTOR_MODE_P (mode0))
6028 op0 = safe_vector_operand (op0, mode0);
6029 if (VECTOR_MODE_P (mode1))
6030 op1 = safe_vector_operand (op1, mode1);
6031
6032 if (! target
6033 || GET_MODE (target) != tmode
6034 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6035 target = gen_reg_rtx (tmode);
6036
6037 if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
6038 {
6039 op0mode = HImode;
6040 op0 = gen_lowpart (HImode, op0);
6041 }
6042 if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
6043 {
6044 op1mode = HImode;
6045 op1 = gen_lowpart (HImode, op1);
6046 }
6047 /* In case the insn wants input operands in modes different from
6048 the result, abort. */
6049 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6050 && (op1mode == mode1 || op1mode == VOIDmode));
6051
6052 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6053 op0 = copy_to_mode_reg (mode0, op0);
6054 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
6055 op1 = copy_to_mode_reg (mode1, op1);
6056
6057 if (macflag == -1)
6058 pat = GEN_FCN (icode) (target, op0, op1);
6059 else
6060 pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
6061 if (! pat)
6062 return 0;
6063
6064 emit_insn (pat);
6065 return target;
6066 }
6067
6068 /* Subroutine of bfin_expand_builtin to take care of unop insns. */
6069
6070 static rtx
6071 bfin_expand_unop_builtin (enum insn_code icode, tree exp,
6072 rtx target)
6073 {
6074 rtx pat;
6075 tree arg0 = CALL_EXPR_ARG (exp, 0);
6076 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6077 enum machine_mode op0mode = GET_MODE (op0);
6078 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6079 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6080
6081 if (! target
6082 || GET_MODE (target) != tmode
6083 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6084 target = gen_reg_rtx (tmode);
6085
6086 if (VECTOR_MODE_P (mode0))
6087 op0 = safe_vector_operand (op0, mode0);
6088
6089 if (op0mode == SImode && mode0 == HImode)
6090 {
6091 op0mode = HImode;
6092 op0 = gen_lowpart (HImode, op0);
6093 }
6094 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6095
6096 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6097 op0 = copy_to_mode_reg (mode0, op0);
6098
6099 pat = GEN_FCN (icode) (target, op0);
6100 if (! pat)
6101 return 0;
6102 emit_insn (pat);
6103 return target;
6104 }
6105
6106 /* Expand an expression EXP that calls a built-in function,
6107 with result going to TARGET if that's convenient
6108 (and in mode MODE if that's convenient).
6109 SUBTARGET may be used as the target for computing one of EXP's operands.
6110 IGNORE is nonzero if the value is to be ignored. */
6111
6112 static rtx
6113 bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6114 rtx subtarget ATTRIBUTE_UNUSED,
6115 enum machine_mode mode ATTRIBUTE_UNUSED,
6116 int ignore ATTRIBUTE_UNUSED)
6117 {
6118 size_t i;
6119 enum insn_code icode;
6120 const struct builtin_description *d;
6121 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6122 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6123 tree arg0, arg1, arg2;
6124 rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
6125 enum machine_mode tmode, mode0;
6126
6127 switch (fcode)
6128 {
6129 case BFIN_BUILTIN_CSYNC:
6130 emit_insn (gen_csync ());
6131 return 0;
6132 case BFIN_BUILTIN_SSYNC:
6133 emit_insn (gen_ssync ());
6134 return 0;
6135
6136 case BFIN_BUILTIN_DIFFHL_2X16:
6137 case BFIN_BUILTIN_DIFFLH_2X16:
6138 case BFIN_BUILTIN_SUM_2X16:
6139 arg0 = CALL_EXPR_ARG (exp, 0);
6140 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6141 icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
6142 : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
6143 : CODE_FOR_ssaddhilov2hi3);
6144 tmode = insn_data[icode].operand[0].mode;
6145 mode0 = insn_data[icode].operand[1].mode;
6146
6147 if (! target
6148 || GET_MODE (target) != tmode
6149 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6150 target = gen_reg_rtx (tmode);
6151
6152 if (VECTOR_MODE_P (mode0))
6153 op0 = safe_vector_operand (op0, mode0);
6154
6155 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6156 op0 = copy_to_mode_reg (mode0, op0);
6157
6158 pat = GEN_FCN (icode) (target, op0, op0);
6159 if (! pat)
6160 return 0;
6161 emit_insn (pat);
6162 return target;
6163
6164 case BFIN_BUILTIN_MULT_1X32X32:
6165 case BFIN_BUILTIN_MULT_1X32X32NS:
6166 arg0 = CALL_EXPR_ARG (exp, 0);
6167 arg1 = CALL_EXPR_ARG (exp, 1);
6168 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6169 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6170 if (! target
6171 || !register_operand (target, SImode))
6172 target = gen_reg_rtx (SImode);
6173
6174 a1reg = gen_rtx_REG (PDImode, REG_A1);
6175 a0reg = gen_rtx_REG (PDImode, REG_A0);
6176 tmp1 = gen_lowpart (V2HImode, op0);
6177 tmp2 = gen_lowpart (V2HImode, op1);
6178 emit_insn (gen_flag_macinit1hi (a1reg,
6179 gen_lowpart (HImode, op0),
6180 gen_lowpart (HImode, op1),
6181 GEN_INT (MACFLAG_FU)));
6182 emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
6183
6184 if (fcode == BFIN_BUILTIN_MULT_1X32X32)
6185 emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
6186 const1_rtx, const1_rtx,
6187 const1_rtx, const0_rtx, a1reg,
6188 const0_rtx, GEN_INT (MACFLAG_NONE),
6189 GEN_INT (MACFLAG_M)));
6190 else
6191 {
6192 /* For saturating multiplication, there's exactly one special case
6193 to be handled: multiplying the smallest negative value with
6194 itself. Due to shift correction in fractional multiplies, this
6195 can overflow. Iff this happens, OP2 will contain 1, which, when
6196 added in 32 bits to the smallest negative, wraps to the largest
6197 positive, which is the result we want. */
6198 op2 = gen_reg_rtx (V2HImode);
6199 emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
6200 emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
6201 gen_lowpart (SImode, op2)));
6202 emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
6203 const1_rtx, const1_rtx,
6204 const1_rtx, const0_rtx, a1reg,
6205 const0_rtx, GEN_INT (MACFLAG_NONE),
6206 GEN_INT (MACFLAG_M)));
6207 op2 = gen_reg_rtx (SImode);
6208 emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
6209 }
6210 emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
6211 const1_rtx, const0_rtx,
6212 a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
6213 emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
6214 emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
6215 if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
6216 emit_insn (gen_addsi3 (target, target, op2));
6217 return target;
6218
6219 case BFIN_BUILTIN_CPLX_MUL_16:
6220 case BFIN_BUILTIN_CPLX_MUL_16_S40:
6221 arg0 = CALL_EXPR_ARG (exp, 0);
6222 arg1 = CALL_EXPR_ARG (exp, 1);
6223 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6224 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6225 accvec = gen_reg_rtx (V2PDImode);
6226
6227 if (! target
6228 || GET_MODE (target) != V2HImode
6229 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6230 target = gen_reg_rtx (tmode);
6231 if (! register_operand (op0, GET_MODE (op0)))
6232 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6233 if (! register_operand (op1, GET_MODE (op1)))
6234 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6235
6236 if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
6237 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6238 const0_rtx, const0_rtx,
6239 const1_rtx, GEN_INT (MACFLAG_W32)));
6240 else
6241 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6242 const0_rtx, const0_rtx,
6243 const1_rtx, GEN_INT (MACFLAG_NONE)));
6244 emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
6245 const1_rtx, const1_rtx,
6246 const0_rtx, accvec, const1_rtx, const0_rtx,
6247 GEN_INT (MACFLAG_NONE), accvec));
6248
6249 return target;
6250
6251 case BFIN_BUILTIN_CPLX_MAC_16:
6252 case BFIN_BUILTIN_CPLX_MSU_16:
6253 case BFIN_BUILTIN_CPLX_MAC_16_S40:
6254 case BFIN_BUILTIN_CPLX_MSU_16_S40:
6255 arg0 = CALL_EXPR_ARG (exp, 0);
6256 arg1 = CALL_EXPR_ARG (exp, 1);
6257 arg2 = CALL_EXPR_ARG (exp, 2);
6258 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6259 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6260 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
6261 accvec = gen_reg_rtx (V2PDImode);
6262
6263 if (! target
6264 || GET_MODE (target) != V2HImode
6265 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6266 target = gen_reg_rtx (tmode);
6267 if (! register_operand (op1, GET_MODE (op1)))
6268 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6269 if (! register_operand (op2, GET_MODE (op2)))
6270 op2 = copy_to_mode_reg (GET_MODE (op2), op2);
6271
6272 tmp1 = gen_reg_rtx (SImode);
6273 tmp2 = gen_reg_rtx (SImode);
6274 emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
6275 emit_move_insn (tmp2, gen_lowpart (SImode, op0));
6276 emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
6277 emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
6278 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6279 || fcode == BFIN_BUILTIN_CPLX_MSU_16)
6280 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6281 const0_rtx, const0_rtx,
6282 const1_rtx, accvec, const0_rtx,
6283 const0_rtx,
6284 GEN_INT (MACFLAG_W32)));
6285 else
6286 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6287 const0_rtx, const0_rtx,
6288 const1_rtx, accvec, const0_rtx,
6289 const0_rtx,
6290 GEN_INT (MACFLAG_NONE)));
6291 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6292 || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
6293 {
6294 tmp1 = const1_rtx;
6295 tmp2 = const0_rtx;
6296 }
6297 else
6298 {
6299 tmp1 = const0_rtx;
6300 tmp2 = const1_rtx;
6301 }
6302 emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
6303 const1_rtx, const1_rtx,
6304 const0_rtx, accvec, tmp1, tmp2,
6305 GEN_INT (MACFLAG_NONE), accvec));
6306
6307 return target;
6308
6309 case BFIN_BUILTIN_CPLX_SQU:
6310 arg0 = CALL_EXPR_ARG (exp, 0);
6311 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6312 accvec = gen_reg_rtx (V2PDImode);
6313 icode = CODE_FOR_flag_mulv2hi;
6314 tmp1 = gen_reg_rtx (V2HImode);
6315 tmp2 = gen_reg_rtx (V2HImode);
6316
6317 if (! target
6318 || GET_MODE (target) != V2HImode
6319 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6320 target = gen_reg_rtx (V2HImode);
6321 if (! register_operand (op0, GET_MODE (op0)))
6322 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6323
6324 emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
6325
6326 emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
6327 const0_rtx, const1_rtx,
6328 GEN_INT (MACFLAG_NONE)));
6329
6330 emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
6331 const0_rtx));
6332 emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
6333 const0_rtx, const1_rtx));
6334
6335 return target;
6336
6337 default:
6338 break;
6339 }
6340
6341 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6342 if (d->code == fcode)
6343 return bfin_expand_binop_builtin (d->icode, exp, target,
6344 d->macflag);
6345
6346 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6347 if (d->code == fcode)
6348 return bfin_expand_unop_builtin (d->icode, exp, target);
6349
6350 gcc_unreachable ();
6351 }
6352 \f
6353 #undef TARGET_INIT_BUILTINS
6354 #define TARGET_INIT_BUILTINS bfin_init_builtins
6355
6356 #undef TARGET_EXPAND_BUILTIN
6357 #define TARGET_EXPAND_BUILTIN bfin_expand_builtin
6358
6359 #undef TARGET_ASM_GLOBALIZE_LABEL
6360 #define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label
6361
6362 #undef TARGET_ASM_FILE_START
6363 #define TARGET_ASM_FILE_START output_file_start
6364
6365 #undef TARGET_ATTRIBUTE_TABLE
6366 #define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
6367
6368 #undef TARGET_COMP_TYPE_ATTRIBUTES
6369 #define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
6370
6371 #undef TARGET_RTX_COSTS
6372 #define TARGET_RTX_COSTS bfin_rtx_costs
6373
6374 #undef TARGET_ADDRESS_COST
6375 #define TARGET_ADDRESS_COST bfin_address_cost
6376
6377 #undef TARGET_ASM_INTEGER
6378 #define TARGET_ASM_INTEGER bfin_assemble_integer
6379
6380 #undef TARGET_MACHINE_DEPENDENT_REORG
6381 #define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
6382
6383 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6384 #define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
6385
6386 #undef TARGET_ASM_OUTPUT_MI_THUNK
6387 #define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
6388 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6389 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
6390
6391 #undef TARGET_SCHED_ADJUST_COST
6392 #define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
6393
6394 #undef TARGET_SCHED_ISSUE_RATE
6395 #define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
6396
6397 #undef TARGET_PROMOTE_FUNCTION_MODE
6398 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
6399
6400 #undef TARGET_ARG_PARTIAL_BYTES
6401 #define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
6402
6403 #undef TARGET_PASS_BY_REFERENCE
6404 #define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
6405
6406 #undef TARGET_SETUP_INCOMING_VARARGS
6407 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
6408
6409 #undef TARGET_STRUCT_VALUE_RTX
6410 #define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
6411
6412 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6413 #define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
6414
6415 #undef TARGET_HANDLE_OPTION
6416 #define TARGET_HANDLE_OPTION bfin_handle_option
6417
6418 #undef TARGET_DEFAULT_TARGET_FLAGS
6419 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
6420
6421 #undef TARGET_SECONDARY_RELOAD
6422 #define TARGET_SECONDARY_RELOAD bfin_secondary_reload
6423
6424 #undef TARGET_DELEGITIMIZE_ADDRESS
6425 #define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
6426
6427 #undef TARGET_CANNOT_FORCE_CONST_MEM
6428 #define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
6429
6430 #undef TARGET_RETURN_IN_MEMORY
6431 #define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
6432
6433 #undef TARGET_LEGITIMATE_ADDRESS_P
6434 #define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p
6435
6436 #undef TARGET_FRAME_POINTER_REQUIRED
6437 #define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
6438
6439 #undef TARGET_CAN_ELIMINATE
6440 #define TARGET_CAN_ELIMINATE bfin_can_eliminate
6441
6442 struct gcc_target targetm = TARGET_INITIALIZER;