e523680156637d4e7f5d6996fc06f2e5d0337603
[gcc.git] / gcc / config / bfin / bfin.c
1 /* The Blackfin code generation auxiliary output file.
2 Copyright (C) 2005, 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
3 Contributed by Analog Devices.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify it
8 under the terms of the GNU General Public License as published
9 by the Free Software Foundation; either version 3, or (at your
10 option) any later version.
11
12 GCC is distributed in the hope that it will be useful, but WITHOUT
13 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
14 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
15 License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "regs.h"
27 #include "hard-reg-set.h"
28 #include "real.h"
29 #include "insn-config.h"
30 #include "insn-codes.h"
31 #include "conditions.h"
32 #include "insn-flags.h"
33 #include "output.h"
34 #include "insn-attr.h"
35 #include "tree.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "input.h"
40 #include "target.h"
41 #include "target-def.h"
42 #include "expr.h"
43 #include "toplev.h"
44 #include "recog.h"
45 #include "optabs.h"
46 #include "ggc.h"
47 #include "integrate.h"
48 #include "cgraph.h"
49 #include "langhooks.h"
50 #include "bfin-protos.h"
51 #include "tm-preds.h"
52 #include "tm-constrs.h"
53 #include "gt-bfin.h"
54 #include "basic-block.h"
55 #include "cfglayout.h"
56 #include "timevar.h"
57 #include "df.h"
58
59 /* A C structure for machine-specific, per-function data.
60 This is added to the cfun structure. */
61 struct GTY(()) machine_function
62 {
63 /* Set if we are notified by the doloop pass that a hardware loop
64 was created. */
65 int has_hardware_loops;
66
67 /* Set if we create a memcpy pattern that uses loop registers. */
68 int has_loopreg_clobber;
69 };
70
71 /* RTX for condition code flag register and RETS register */
72 extern GTY(()) rtx bfin_cc_rtx;
73 extern GTY(()) rtx bfin_rets_rtx;
74 rtx bfin_cc_rtx, bfin_rets_rtx;
75
76 int max_arg_registers = 0;
77
78 /* Arrays used when emitting register names. */
79 const char *short_reg_names[] = SHORT_REGISTER_NAMES;
80 const char *high_reg_names[] = HIGH_REGISTER_NAMES;
81 const char *dregs_pair_names[] = DREGS_PAIR_NAMES;
82 const char *byte_reg_names[] = BYTE_REGISTER_NAMES;
83
84 static int arg_regs[] = FUNCTION_ARG_REGISTERS;
85 static int ret_regs[] = FUNCTION_RETURN_REGISTERS;
86
87 /* Nonzero if -mshared-library-id was given. */
88 static int bfin_lib_id_given;
89
90 /* Nonzero if -fschedule-insns2 was given. We override it and
91 call the scheduler ourselves during reorg. */
92 static int bfin_flag_schedule_insns2;
93
94 /* Determines whether we run variable tracking in machine dependent
95 reorganization. */
96 static int bfin_flag_var_tracking;
97
98 /* -mcpu support */
99 bfin_cpu_t bfin_cpu_type = BFIN_CPU_UNKNOWN;
100
101 /* -msi-revision support. There are three special values:
102 -1 -msi-revision=none.
103 0xffff -msi-revision=any. */
104 int bfin_si_revision;
105
106 /* The workarounds enabled */
107 unsigned int bfin_workarounds = 0;
108
109 struct bfin_cpu
110 {
111 const char *name;
112 bfin_cpu_t type;
113 int si_revision;
114 unsigned int workarounds;
115 };
116
117 struct bfin_cpu bfin_cpus[] =
118 {
119 {"bf512", BFIN_CPU_BF512, 0x0000,
120 WA_SPECULATIVE_LOADS | WA_05000074},
121
122 {"bf514", BFIN_CPU_BF514, 0x0000,
123 WA_SPECULATIVE_LOADS | WA_05000074},
124
125 {"bf516", BFIN_CPU_BF516, 0x0000,
126 WA_SPECULATIVE_LOADS | WA_05000074},
127
128 {"bf518", BFIN_CPU_BF518, 0x0000,
129 WA_SPECULATIVE_LOADS | WA_05000074},
130
131 {"bf522", BFIN_CPU_BF522, 0x0002,
132 WA_SPECULATIVE_LOADS | WA_05000074},
133 {"bf522", BFIN_CPU_BF522, 0x0001,
134 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
135 {"bf522", BFIN_CPU_BF522, 0x0000,
136 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
137
138 {"bf523", BFIN_CPU_BF523, 0x0002,
139 WA_SPECULATIVE_LOADS | WA_05000074},
140 {"bf523", BFIN_CPU_BF523, 0x0001,
141 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
142 {"bf523", BFIN_CPU_BF523, 0x0000,
143 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
144
145 {"bf524", BFIN_CPU_BF524, 0x0002,
146 WA_SPECULATIVE_LOADS | WA_05000074},
147 {"bf524", BFIN_CPU_BF524, 0x0001,
148 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
149 {"bf524", BFIN_CPU_BF524, 0x0000,
150 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
151
152 {"bf525", BFIN_CPU_BF525, 0x0002,
153 WA_SPECULATIVE_LOADS | WA_05000074},
154 {"bf525", BFIN_CPU_BF525, 0x0001,
155 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
156 {"bf525", BFIN_CPU_BF525, 0x0000,
157 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
158
159 {"bf526", BFIN_CPU_BF526, 0x0002,
160 WA_SPECULATIVE_LOADS | WA_05000074},
161 {"bf526", BFIN_CPU_BF526, 0x0001,
162 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
163 {"bf526", BFIN_CPU_BF526, 0x0000,
164 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
165
166 {"bf527", BFIN_CPU_BF527, 0x0002,
167 WA_SPECULATIVE_LOADS | WA_05000074},
168 {"bf527", BFIN_CPU_BF527, 0x0001,
169 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
170 {"bf527", BFIN_CPU_BF527, 0x0000,
171 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000074},
172
173 {"bf531", BFIN_CPU_BF531, 0x0006,
174 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
175 {"bf531", BFIN_CPU_BF531, 0x0005,
176 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
177 | WA_LOAD_LCREGS | WA_05000074},
178 {"bf531", BFIN_CPU_BF531, 0x0004,
179 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
180 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
181 | WA_05000074},
182 {"bf531", BFIN_CPU_BF531, 0x0003,
183 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
184 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
185 | WA_05000074},
186
187 {"bf532", BFIN_CPU_BF532, 0x0006,
188 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
189 {"bf532", BFIN_CPU_BF532, 0x0005,
190 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
191 | WA_LOAD_LCREGS | WA_05000074},
192 {"bf532", BFIN_CPU_BF532, 0x0004,
193 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
194 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
195 | WA_05000074},
196 {"bf532", BFIN_CPU_BF532, 0x0003,
197 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
198 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
199 | WA_05000074},
200
201 {"bf533", BFIN_CPU_BF533, 0x0006,
202 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
203 {"bf533", BFIN_CPU_BF533, 0x0005,
204 WA_SPECULATIVE_LOADS | WA_RETS | WA_05000283 | WA_05000315
205 | WA_LOAD_LCREGS | WA_05000074},
206 {"bf533", BFIN_CPU_BF533, 0x0004,
207 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
208 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
209 | WA_05000074},
210 {"bf533", BFIN_CPU_BF533, 0x0003,
211 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
212 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
213 | WA_05000074},
214
215 {"bf534", BFIN_CPU_BF534, 0x0003,
216 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
217 {"bf534", BFIN_CPU_BF534, 0x0002,
218 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
219 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
220 | WA_05000074},
221 {"bf534", BFIN_CPU_BF534, 0x0001,
222 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
223 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
224 | WA_05000074},
225
226 {"bf536", BFIN_CPU_BF536, 0x0003,
227 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
228 {"bf536", BFIN_CPU_BF536, 0x0002,
229 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
230 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
231 | WA_05000074},
232 {"bf536", BFIN_CPU_BF536, 0x0001,
233 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
234 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
235 | WA_05000074},
236
237 {"bf537", BFIN_CPU_BF537, 0x0003,
238 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
239 {"bf537", BFIN_CPU_BF537, 0x0002,
240 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
241 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
242 | WA_05000074},
243 {"bf537", BFIN_CPU_BF537, 0x0001,
244 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
245 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
246 | WA_05000074},
247
248 {"bf538", BFIN_CPU_BF538, 0x0005,
249 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
250 {"bf538", BFIN_CPU_BF538, 0x0004,
251 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
252 {"bf538", BFIN_CPU_BF538, 0x0003,
253 WA_SPECULATIVE_LOADS | WA_RETS
254 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
255 {"bf538", BFIN_CPU_BF538, 0x0002,
256 WA_SPECULATIVE_LOADS | WA_RETS
257 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
258 | WA_05000074},
259
260 {"bf539", BFIN_CPU_BF539, 0x0005,
261 WA_SPECULATIVE_LOADS | WA_LOAD_LCREGS | WA_05000074},
262 {"bf539", BFIN_CPU_BF539, 0x0004,
263 WA_SPECULATIVE_LOADS | WA_RETS | WA_LOAD_LCREGS | WA_05000074},
264 {"bf539", BFIN_CPU_BF539, 0x0003,
265 WA_SPECULATIVE_LOADS | WA_RETS
266 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
267 {"bf539", BFIN_CPU_BF539, 0x0002,
268 WA_SPECULATIVE_LOADS | WA_RETS
269 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
270 | WA_05000074},
271
272 {"bf542m", BFIN_CPU_BF542M, 0x0003,
273 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
274
275 {"bf542", BFIN_CPU_BF542, 0x0002,
276 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
277 {"bf542", BFIN_CPU_BF542, 0x0001,
278 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
279 {"bf542", BFIN_CPU_BF542, 0x0000,
280 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
281 | WA_05000074},
282
283 {"bf544m", BFIN_CPU_BF544M, 0x0003,
284 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
285
286 {"bf544", BFIN_CPU_BF544, 0x0002,
287 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
288 {"bf544", BFIN_CPU_BF544, 0x0001,
289 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
290 {"bf544", BFIN_CPU_BF544, 0x0000,
291 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
292 | WA_05000074},
293
294 {"bf547m", BFIN_CPU_BF547M, 0x0003,
295 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
296
297 {"bf547", BFIN_CPU_BF547, 0x0002,
298 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
299 {"bf547", BFIN_CPU_BF547, 0x0001,
300 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
301 {"bf547", BFIN_CPU_BF547, 0x0000,
302 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
303 | WA_05000074},
304
305 {"bf548m", BFIN_CPU_BF548M, 0x0003,
306 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
307
308 {"bf548", BFIN_CPU_BF548, 0x0002,
309 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
310 {"bf548", BFIN_CPU_BF548, 0x0001,
311 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
312 {"bf548", BFIN_CPU_BF548, 0x0000,
313 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
314 | WA_05000074},
315
316 {"bf549m", BFIN_CPU_BF549M, 0x0003,
317 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
318
319 {"bf549", BFIN_CPU_BF549, 0x0002,
320 WA_SPECULATIVE_LOADS | WA_INDIRECT_CALLS | WA_05000074},
321 {"bf549", BFIN_CPU_BF549, 0x0001,
322 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_05000074},
323 {"bf549", BFIN_CPU_BF549, 0x0000,
324 WA_SPECULATIVE_LOADS | WA_RETS | WA_INDIRECT_CALLS | WA_LOAD_LCREGS
325 | WA_05000074},
326
327 {"bf561", BFIN_CPU_BF561, 0x0005, WA_RETS
328 | WA_05000283 | WA_05000315 | WA_LOAD_LCREGS | WA_05000074},
329 {"bf561", BFIN_CPU_BF561, 0x0003,
330 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
331 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
332 | WA_05000074},
333 {"bf561", BFIN_CPU_BF561, 0x0002,
334 WA_SPECULATIVE_LOADS | WA_SPECULATIVE_SYNCS | WA_RETS
335 | WA_05000283 | WA_05000257 | WA_05000315 | WA_LOAD_LCREGS
336 | WA_05000074},
337
338 {NULL, 0, 0, 0}
339 };
340
341 int splitting_for_sched, splitting_loops;
342
343 static void
344 bfin_globalize_label (FILE *stream, const char *name)
345 {
346 fputs (".global ", stream);
347 assemble_name (stream, name);
348 fputc (';',stream);
349 fputc ('\n',stream);
350 }
351
352 static void
353 output_file_start (void)
354 {
355 FILE *file = asm_out_file;
356 int i;
357
358 /* Variable tracking should be run after all optimizations which change order
359 of insns. It also needs a valid CFG. This can't be done in
360 override_options, because flag_var_tracking is finalized after
361 that. */
362 bfin_flag_var_tracking = flag_var_tracking;
363 flag_var_tracking = 0;
364
365 fprintf (file, ".file \"%s\";\n", input_filename);
366
367 for (i = 0; arg_regs[i] >= 0; i++)
368 ;
369 max_arg_registers = i; /* how many arg reg used */
370 }
371
372 /* Called early in the compilation to conditionally modify
373 fixed_regs/call_used_regs. */
374
375 void
376 conditional_register_usage (void)
377 {
378 /* initialize condition code flag register rtx */
379 bfin_cc_rtx = gen_rtx_REG (BImode, REG_CC);
380 bfin_rets_rtx = gen_rtx_REG (Pmode, REG_RETS);
381 }
382
383 /* Examine machine-dependent attributes of function type FUNTYPE and return its
384 type. See the definition of E_FUNKIND. */
385
386 static e_funkind
387 funkind (const_tree funtype)
388 {
389 tree attrs = TYPE_ATTRIBUTES (funtype);
390 if (lookup_attribute ("interrupt_handler", attrs))
391 return INTERRUPT_HANDLER;
392 else if (lookup_attribute ("exception_handler", attrs))
393 return EXCPT_HANDLER;
394 else if (lookup_attribute ("nmi_handler", attrs))
395 return NMI_HANDLER;
396 else
397 return SUBROUTINE;
398 }
399 \f
400 /* Legitimize PIC addresses. If the address is already position-independent,
401 we return ORIG. Newly generated position-independent addresses go into a
402 reg. This is REG if nonzero, otherwise we allocate register(s) as
403 necessary. PICREG is the register holding the pointer to the PIC offset
404 table. */
405
406 static rtx
407 legitimize_pic_address (rtx orig, rtx reg, rtx picreg)
408 {
409 rtx addr = orig;
410 rtx new_rtx = orig;
411
412 if (GET_CODE (addr) == SYMBOL_REF || GET_CODE (addr) == LABEL_REF)
413 {
414 int unspec;
415 rtx tmp;
416
417 if (TARGET_ID_SHARED_LIBRARY)
418 unspec = UNSPEC_MOVE_PIC;
419 else if (GET_CODE (addr) == SYMBOL_REF
420 && SYMBOL_REF_FUNCTION_P (addr))
421 unspec = UNSPEC_FUNCDESC_GOT17M4;
422 else
423 unspec = UNSPEC_MOVE_FDPIC;
424
425 if (reg == 0)
426 {
427 gcc_assert (can_create_pseudo_p ());
428 reg = gen_reg_rtx (Pmode);
429 }
430
431 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), unspec);
432 new_rtx = gen_const_mem (Pmode, gen_rtx_PLUS (Pmode, picreg, tmp));
433
434 emit_move_insn (reg, new_rtx);
435 if (picreg == pic_offset_table_rtx)
436 crtl->uses_pic_offset_table = 1;
437 return reg;
438 }
439
440 else if (GET_CODE (addr) == CONST || GET_CODE (addr) == PLUS)
441 {
442 rtx base;
443
444 if (GET_CODE (addr) == CONST)
445 {
446 addr = XEXP (addr, 0);
447 gcc_assert (GET_CODE (addr) == PLUS);
448 }
449
450 if (XEXP (addr, 0) == picreg)
451 return orig;
452
453 if (reg == 0)
454 {
455 gcc_assert (can_create_pseudo_p ());
456 reg = gen_reg_rtx (Pmode);
457 }
458
459 base = legitimize_pic_address (XEXP (addr, 0), reg, picreg);
460 addr = legitimize_pic_address (XEXP (addr, 1),
461 base == reg ? NULL_RTX : reg,
462 picreg);
463
464 if (GET_CODE (addr) == CONST_INT)
465 {
466 gcc_assert (! reload_in_progress && ! reload_completed);
467 addr = force_reg (Pmode, addr);
468 }
469
470 if (GET_CODE (addr) == PLUS && CONSTANT_P (XEXP (addr, 1)))
471 {
472 base = gen_rtx_PLUS (Pmode, base, XEXP (addr, 0));
473 addr = XEXP (addr, 1);
474 }
475
476 return gen_rtx_PLUS (Pmode, base, addr);
477 }
478
479 return new_rtx;
480 }
481 \f
482 /* Stack frame layout. */
483
484 /* For a given REGNO, determine whether it must be saved in the function
485 prologue. IS_INTHANDLER specifies whether we're generating a normal
486 prologue or an interrupt/exception one. */
487 static bool
488 must_save_p (bool is_inthandler, unsigned regno)
489 {
490 if (D_REGNO_P (regno))
491 {
492 bool is_eh_return_reg = false;
493 if (crtl->calls_eh_return)
494 {
495 unsigned j;
496 for (j = 0; ; j++)
497 {
498 unsigned test = EH_RETURN_DATA_REGNO (j);
499 if (test == INVALID_REGNUM)
500 break;
501 if (test == regno)
502 is_eh_return_reg = true;
503 }
504 }
505
506 return (is_eh_return_reg
507 || (df_regs_ever_live_p (regno)
508 && !fixed_regs[regno]
509 && (is_inthandler || !call_used_regs[regno])));
510 }
511 else if (P_REGNO_P (regno))
512 {
513 return ((df_regs_ever_live_p (regno)
514 && !fixed_regs[regno]
515 && (is_inthandler || !call_used_regs[regno]))
516 || (is_inthandler
517 && (ENABLE_WA_05000283 || ENABLE_WA_05000315)
518 && regno == REG_P5)
519 || (!TARGET_FDPIC
520 && regno == PIC_OFFSET_TABLE_REGNUM
521 && (crtl->uses_pic_offset_table
522 || (TARGET_ID_SHARED_LIBRARY && !current_function_is_leaf))));
523 }
524 else
525 return ((is_inthandler || !call_used_regs[regno])
526 && (df_regs_ever_live_p (regno)
527 || (!leaf_function_p () && call_used_regs[regno])));
528
529 }
530
531 /* Compute the number of DREGS to save with a push_multiple operation.
532 This could include registers that aren't modified in the function,
533 since push_multiple only takes a range of registers.
534 If IS_INTHANDLER, then everything that is live must be saved, even
535 if normally call-clobbered.
536 If CONSECUTIVE, return the number of registers we can save in one
537 instruction with a push/pop multiple instruction. */
538
539 static int
540 n_dregs_to_save (bool is_inthandler, bool consecutive)
541 {
542 int count = 0;
543 unsigned i;
544
545 for (i = REG_R7 + 1; i-- != REG_R0;)
546 {
547 if (must_save_p (is_inthandler, i))
548 count++;
549 else if (consecutive)
550 return count;
551 }
552 return count;
553 }
554
555 /* Like n_dregs_to_save, but compute number of PREGS to save. */
556
557 static int
558 n_pregs_to_save (bool is_inthandler, bool consecutive)
559 {
560 int count = 0;
561 unsigned i;
562
563 for (i = REG_P5 + 1; i-- != REG_P0;)
564 if (must_save_p (is_inthandler, i))
565 count++;
566 else if (consecutive)
567 return count;
568 return count;
569 }
570
571 /* Determine if we are going to save the frame pointer in the prologue. */
572
573 static bool
574 must_save_fp_p (void)
575 {
576 return df_regs_ever_live_p (REG_FP);
577 }
578
579 /* Determine if we are going to save the RETS register. */
580 static bool
581 must_save_rets_p (void)
582 {
583 return df_regs_ever_live_p (REG_RETS);
584 }
585
586 static bool
587 stack_frame_needed_p (void)
588 {
589 /* EH return puts a new return address into the frame using an
590 address relative to the frame pointer. */
591 if (crtl->calls_eh_return)
592 return true;
593 return frame_pointer_needed;
594 }
595
596 /* Emit code to save registers in the prologue. SAVEALL is nonzero if we
597 must save all registers; this is used for interrupt handlers.
598 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
599 this for an interrupt (or exception) handler. */
600
601 static void
602 expand_prologue_reg_save (rtx spreg, int saveall, bool is_inthandler)
603 {
604 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
605 rtx predec = gen_rtx_MEM (SImode, predec1);
606 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
607 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
608 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
609 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
610 int dregno, pregno;
611 int total_consec = ndregs_consec + npregs_consec;
612 int i, d_to_save;
613
614 if (saveall || is_inthandler)
615 {
616 rtx insn = emit_move_insn (predec, gen_rtx_REG (SImode, REG_ASTAT));
617
618 RTX_FRAME_RELATED_P (insn) = 1;
619 for (dregno = REG_LT0; dregno <= REG_LB1; dregno++)
620 if (! current_function_is_leaf
621 || cfun->machine->has_hardware_loops
622 || cfun->machine->has_loopreg_clobber
623 || (ENABLE_WA_05000257
624 && (dregno == REG_LC0 || dregno == REG_LC1)))
625 {
626 insn = emit_move_insn (predec, gen_rtx_REG (SImode, dregno));
627 RTX_FRAME_RELATED_P (insn) = 1;
628 }
629 }
630
631 if (total_consec != 0)
632 {
633 rtx insn;
634 rtx val = GEN_INT (-total_consec * 4);
635 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 2));
636
637 XVECEXP (pat, 0, 0) = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, val),
638 UNSPEC_PUSH_MULTIPLE);
639 XVECEXP (pat, 0, total_consec + 1) = gen_rtx_SET (VOIDmode, spreg,
640 gen_rtx_PLUS (Pmode,
641 spreg,
642 val));
643 RTX_FRAME_RELATED_P (XVECEXP (pat, 0, total_consec + 1)) = 1;
644 d_to_save = ndregs_consec;
645 dregno = REG_R7 + 1 - ndregs_consec;
646 pregno = REG_P5 + 1 - npregs_consec;
647 for (i = 0; i < total_consec; i++)
648 {
649 rtx memref = gen_rtx_MEM (word_mode,
650 gen_rtx_PLUS (Pmode, spreg,
651 GEN_INT (- i * 4 - 4)));
652 rtx subpat;
653 if (d_to_save > 0)
654 {
655 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
656 dregno++));
657 d_to_save--;
658 }
659 else
660 {
661 subpat = gen_rtx_SET (VOIDmode, memref, gen_rtx_REG (word_mode,
662 pregno++));
663 }
664 XVECEXP (pat, 0, i + 1) = subpat;
665 RTX_FRAME_RELATED_P (subpat) = 1;
666 }
667 insn = emit_insn (pat);
668 RTX_FRAME_RELATED_P (insn) = 1;
669 }
670
671 for (dregno = REG_R0; ndregs != ndregs_consec; dregno++)
672 {
673 if (must_save_p (is_inthandler, dregno))
674 {
675 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, dregno));
676 RTX_FRAME_RELATED_P (insn) = 1;
677 ndregs--;
678 }
679 }
680 for (pregno = REG_P0; npregs != npregs_consec; pregno++)
681 {
682 if (must_save_p (is_inthandler, pregno))
683 {
684 rtx insn = emit_move_insn (predec, gen_rtx_REG (word_mode, pregno));
685 RTX_FRAME_RELATED_P (insn) = 1;
686 npregs--;
687 }
688 }
689 for (i = REG_P7 + 1; i < REG_CC; i++)
690 if (saveall
691 || (is_inthandler
692 && (df_regs_ever_live_p (i)
693 || (!leaf_function_p () && call_used_regs[i]))))
694 {
695 rtx insn;
696 if (i == REG_A0 || i == REG_A1)
697 insn = emit_move_insn (gen_rtx_MEM (PDImode, predec1),
698 gen_rtx_REG (PDImode, i));
699 else
700 insn = emit_move_insn (predec, gen_rtx_REG (SImode, i));
701 RTX_FRAME_RELATED_P (insn) = 1;
702 }
703 }
704
705 /* Emit code to restore registers in the epilogue. SAVEALL is nonzero if we
706 must save all registers; this is used for interrupt handlers.
707 SPREG contains (reg:SI REG_SP). IS_INTHANDLER is true if we're doing
708 this for an interrupt (or exception) handler. */
709
710 static void
711 expand_epilogue_reg_restore (rtx spreg, bool saveall, bool is_inthandler)
712 {
713 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
714 rtx postinc = gen_rtx_MEM (SImode, postinc1);
715
716 int ndregs = saveall ? 8 : n_dregs_to_save (is_inthandler, false);
717 int npregs = saveall ? 6 : n_pregs_to_save (is_inthandler, false);
718 int ndregs_consec = saveall ? 8 : n_dregs_to_save (is_inthandler, true);
719 int npregs_consec = saveall ? 6 : n_pregs_to_save (is_inthandler, true);
720 int total_consec = ndregs_consec + npregs_consec;
721 int i, regno;
722 rtx insn;
723
724 /* A slightly crude technique to stop flow from trying to delete "dead"
725 insns. */
726 MEM_VOLATILE_P (postinc) = 1;
727
728 for (i = REG_CC - 1; i > REG_P7; i--)
729 if (saveall
730 || (is_inthandler
731 && (df_regs_ever_live_p (i)
732 || (!leaf_function_p () && call_used_regs[i]))))
733 {
734 if (i == REG_A0 || i == REG_A1)
735 {
736 rtx mem = gen_rtx_MEM (PDImode, postinc1);
737 MEM_VOLATILE_P (mem) = 1;
738 emit_move_insn (gen_rtx_REG (PDImode, i), mem);
739 }
740 else
741 emit_move_insn (gen_rtx_REG (SImode, i), postinc);
742 }
743
744 regno = REG_P5 - npregs_consec;
745 for (; npregs != npregs_consec; regno--)
746 {
747 if (must_save_p (is_inthandler, regno))
748 {
749 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
750 npregs--;
751 }
752 }
753 regno = REG_R7 - ndregs_consec;
754 for (; ndregs != ndregs_consec; regno--)
755 {
756 if (must_save_p (is_inthandler, regno))
757 {
758 emit_move_insn (gen_rtx_REG (word_mode, regno), postinc);
759 ndregs--;
760 }
761 }
762
763 if (total_consec != 0)
764 {
765 rtx pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (total_consec + 1));
766 XVECEXP (pat, 0, 0)
767 = gen_rtx_SET (VOIDmode, spreg,
768 gen_rtx_PLUS (Pmode, spreg,
769 GEN_INT (total_consec * 4)));
770
771 if (npregs_consec > 0)
772 regno = REG_P5 + 1;
773 else
774 regno = REG_R7 + 1;
775
776 for (i = 0; i < total_consec; i++)
777 {
778 rtx addr = (i > 0
779 ? gen_rtx_PLUS (Pmode, spreg, GEN_INT (i * 4))
780 : spreg);
781 rtx memref = gen_rtx_MEM (word_mode, addr);
782
783 regno--;
784 XVECEXP (pat, 0, i + 1)
785 = gen_rtx_SET (VOIDmode, gen_rtx_REG (word_mode, regno), memref);
786
787 if (npregs_consec > 0)
788 {
789 if (--npregs_consec == 0)
790 regno = REG_R7 + 1;
791 }
792 }
793
794 insn = emit_insn (pat);
795 RTX_FRAME_RELATED_P (insn) = 1;
796 }
797 if (saveall || is_inthandler)
798 {
799 for (regno = REG_LB1; regno >= REG_LT0; regno--)
800 if (! current_function_is_leaf
801 || cfun->machine->has_hardware_loops
802 || cfun->machine->has_loopreg_clobber
803 || (ENABLE_WA_05000257 && (regno == REG_LC0 || regno == REG_LC1)))
804 emit_move_insn (gen_rtx_REG (SImode, regno), postinc);
805
806 emit_move_insn (gen_rtx_REG (SImode, REG_ASTAT), postinc);
807 }
808 }
809
810 /* Perform any needed actions needed for a function that is receiving a
811 variable number of arguments.
812
813 CUM is as above.
814
815 MODE and TYPE are the mode and type of the current parameter.
816
817 PRETEND_SIZE is a variable that should be set to the amount of stack
818 that must be pushed by the prolog to pretend that our caller pushed
819 it.
820
821 Normally, this macro will push all remaining incoming registers on the
822 stack and set PRETEND_SIZE to the length of the registers pushed.
823
824 Blackfin specific :
825 - VDSP C compiler manual (our ABI) says that a variable args function
826 should save the R0, R1 and R2 registers in the stack.
827 - The caller will always leave space on the stack for the
828 arguments that are passed in registers, so we dont have
829 to leave any extra space.
830 - now, the vastart pointer can access all arguments from the stack. */
831
832 static void
833 setup_incoming_varargs (CUMULATIVE_ARGS *cum,
834 enum machine_mode mode ATTRIBUTE_UNUSED,
835 tree type ATTRIBUTE_UNUSED, int *pretend_size,
836 int no_rtl)
837 {
838 rtx mem;
839 int i;
840
841 if (no_rtl)
842 return;
843
844 /* The move for named arguments will be generated automatically by the
845 compiler. We need to generate the move rtx for the unnamed arguments
846 if they are in the first 3 words. We assume at least 1 named argument
847 exists, so we never generate [ARGP] = R0 here. */
848
849 for (i = cum->words + 1; i < max_arg_registers; i++)
850 {
851 mem = gen_rtx_MEM (Pmode,
852 plus_constant (arg_pointer_rtx, (i * UNITS_PER_WORD)));
853 emit_move_insn (mem, gen_rtx_REG (Pmode, i));
854 }
855
856 *pretend_size = 0;
857 }
858
859 /* Value should be nonzero if functions must have frame pointers.
860 Zero means the frame pointer need not be set up (and parms may
861 be accessed via the stack pointer) in functions that seem suitable. */
862
863 static bool
864 bfin_frame_pointer_required (void)
865 {
866 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
867
868 if (fkind != SUBROUTINE)
869 return true;
870
871 /* We turn on -fomit-frame-pointer if -momit-leaf-frame-pointer is used,
872 so we have to override it for non-leaf functions. */
873 if (TARGET_OMIT_LEAF_FRAME_POINTER && ! current_function_is_leaf)
874 return true;
875
876 return false;
877 }
878
879 /* Return the number of registers pushed during the prologue. */
880
881 static int
882 n_regs_saved_by_prologue (void)
883 {
884 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
885 bool is_inthandler = fkind != SUBROUTINE;
886 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
887 bool all = (lookup_attribute ("saveall", attrs) != NULL_TREE
888 || (is_inthandler && !current_function_is_leaf));
889 int ndregs = all ? 8 : n_dregs_to_save (is_inthandler, false);
890 int npregs = all ? 6 : n_pregs_to_save (is_inthandler, false);
891 int n = ndregs + npregs;
892 int i;
893
894 if (all || stack_frame_needed_p ())
895 n += 2;
896 else
897 {
898 if (must_save_fp_p ())
899 n++;
900 if (must_save_rets_p ())
901 n++;
902 }
903
904 if (fkind != SUBROUTINE || all)
905 {
906 /* Increment once for ASTAT. */
907 n++;
908 if (! current_function_is_leaf
909 || cfun->machine->has_hardware_loops
910 || cfun->machine->has_loopreg_clobber)
911 {
912 n += 6;
913 }
914 }
915
916 if (fkind != SUBROUTINE)
917 {
918 /* RETE/X/N. */
919 if (lookup_attribute ("nesting", attrs))
920 n++;
921 }
922
923 for (i = REG_P7 + 1; i < REG_CC; i++)
924 if (all
925 || (fkind != SUBROUTINE
926 && (df_regs_ever_live_p (i)
927 || (!leaf_function_p () && call_used_regs[i]))))
928 n += i == REG_A0 || i == REG_A1 ? 2 : 1;
929
930 return n;
931 }
932
933 /* Given FROM and TO register numbers, say whether this elimination is
934 allowed. Frame pointer elimination is automatically handled.
935
936 All other eliminations are valid. */
937
938 static bool
939 bfin_can_eliminate (const int from ATTRIBUTE_UNUSED, const int to)
940 {
941 return (to == STACK_POINTER_REGNUM ? ! frame_pointer_needed : true);
942 }
943
944 /* Return the offset between two registers, one to be eliminated, and the other
945 its replacement, at the start of a routine. */
946
947 HOST_WIDE_INT
948 bfin_initial_elimination_offset (int from, int to)
949 {
950 HOST_WIDE_INT offset = 0;
951
952 if (from == ARG_POINTER_REGNUM)
953 offset = n_regs_saved_by_prologue () * 4;
954
955 if (to == STACK_POINTER_REGNUM)
956 {
957 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
958 offset += crtl->outgoing_args_size;
959 else if (crtl->outgoing_args_size)
960 offset += FIXED_STACK_AREA;
961
962 offset += get_frame_size ();
963 }
964
965 return offset;
966 }
967
968 /* Emit code to load a constant CONSTANT into register REG; setting
969 RTX_FRAME_RELATED_P on all insns we generate if RELATED is true.
970 Make sure that the insns we generate need not be split. */
971
972 static void
973 frame_related_constant_load (rtx reg, HOST_WIDE_INT constant, bool related)
974 {
975 rtx insn;
976 rtx cst = GEN_INT (constant);
977
978 if (constant >= -32768 && constant < 65536)
979 insn = emit_move_insn (reg, cst);
980 else
981 {
982 /* We don't call split_load_immediate here, since dwarf2out.c can get
983 confused about some of the more clever sequences it can generate. */
984 insn = emit_insn (gen_movsi_high (reg, cst));
985 if (related)
986 RTX_FRAME_RELATED_P (insn) = 1;
987 insn = emit_insn (gen_movsi_low (reg, reg, cst));
988 }
989 if (related)
990 RTX_FRAME_RELATED_P (insn) = 1;
991 }
992
993 /* Generate efficient code to add a value to a P register.
994 Set RTX_FRAME_RELATED_P on the generated insns if FRAME is nonzero.
995 EPILOGUE_P is zero if this function is called for prologue,
996 otherwise it's nonzero. And it's less than zero if this is for
997 sibcall epilogue. */
998
999 static void
1000 add_to_reg (rtx reg, HOST_WIDE_INT value, int frame, int epilogue_p)
1001 {
1002 if (value == 0)
1003 return;
1004
1005 /* Choose whether to use a sequence using a temporary register, or
1006 a sequence with multiple adds. We can add a signed 7-bit value
1007 in one instruction. */
1008 if (value > 120 || value < -120)
1009 {
1010 rtx tmpreg;
1011 rtx tmpreg2;
1012 rtx insn;
1013
1014 tmpreg2 = NULL_RTX;
1015
1016 /* For prologue or normal epilogue, P1 can be safely used
1017 as the temporary register. For sibcall epilogue, we try to find
1018 a call used P register, which will be restored in epilogue.
1019 If we cannot find such a P register, we have to use one I register
1020 to help us. */
1021
1022 if (epilogue_p >= 0)
1023 tmpreg = gen_rtx_REG (SImode, REG_P1);
1024 else
1025 {
1026 int i;
1027 for (i = REG_P0; i <= REG_P5; i++)
1028 if ((df_regs_ever_live_p (i) && ! call_used_regs[i])
1029 || (!TARGET_FDPIC
1030 && i == PIC_OFFSET_TABLE_REGNUM
1031 && (crtl->uses_pic_offset_table
1032 || (TARGET_ID_SHARED_LIBRARY
1033 && ! current_function_is_leaf))))
1034 break;
1035 if (i <= REG_P5)
1036 tmpreg = gen_rtx_REG (SImode, i);
1037 else
1038 {
1039 tmpreg = gen_rtx_REG (SImode, REG_P1);
1040 tmpreg2 = gen_rtx_REG (SImode, REG_I0);
1041 emit_move_insn (tmpreg2, tmpreg);
1042 }
1043 }
1044
1045 if (frame)
1046 frame_related_constant_load (tmpreg, value, TRUE);
1047 else
1048 insn = emit_move_insn (tmpreg, GEN_INT (value));
1049
1050 insn = emit_insn (gen_addsi3 (reg, reg, tmpreg));
1051 if (frame)
1052 RTX_FRAME_RELATED_P (insn) = 1;
1053
1054 if (tmpreg2 != NULL_RTX)
1055 emit_move_insn (tmpreg, tmpreg2);
1056 }
1057 else
1058 do
1059 {
1060 int size = value;
1061 rtx insn;
1062
1063 if (size > 60)
1064 size = 60;
1065 else if (size < -60)
1066 /* We could use -62, but that would leave the stack unaligned, so
1067 it's no good. */
1068 size = -60;
1069
1070 insn = emit_insn (gen_addsi3 (reg, reg, GEN_INT (size)));
1071 if (frame)
1072 RTX_FRAME_RELATED_P (insn) = 1;
1073 value -= size;
1074 }
1075 while (value != 0);
1076 }
1077
1078 /* Generate a LINK insn for a frame sized FRAME_SIZE. If this constant
1079 is too large, generate a sequence of insns that has the same effect.
1080 SPREG contains (reg:SI REG_SP). */
1081
1082 static void
1083 emit_link_insn (rtx spreg, HOST_WIDE_INT frame_size)
1084 {
1085 HOST_WIDE_INT link_size = frame_size;
1086 rtx insn;
1087 int i;
1088
1089 if (link_size > 262140)
1090 link_size = 262140;
1091
1092 /* Use a LINK insn with as big a constant as possible, then subtract
1093 any remaining size from the SP. */
1094 insn = emit_insn (gen_link (GEN_INT (-8 - link_size)));
1095 RTX_FRAME_RELATED_P (insn) = 1;
1096
1097 for (i = 0; i < XVECLEN (PATTERN (insn), 0); i++)
1098 {
1099 rtx set = XVECEXP (PATTERN (insn), 0, i);
1100 gcc_assert (GET_CODE (set) == SET);
1101 RTX_FRAME_RELATED_P (set) = 1;
1102 }
1103
1104 frame_size -= link_size;
1105
1106 if (frame_size > 0)
1107 {
1108 /* Must use a call-clobbered PREG that isn't the static chain. */
1109 rtx tmpreg = gen_rtx_REG (Pmode, REG_P1);
1110
1111 frame_related_constant_load (tmpreg, -frame_size, TRUE);
1112 insn = emit_insn (gen_addsi3 (spreg, spreg, tmpreg));
1113 RTX_FRAME_RELATED_P (insn) = 1;
1114 }
1115 }
1116
1117 /* Return the number of bytes we must reserve for outgoing arguments
1118 in the current function's stack frame. */
1119
1120 static HOST_WIDE_INT
1121 arg_area_size (void)
1122 {
1123 if (crtl->outgoing_args_size)
1124 {
1125 if (crtl->outgoing_args_size >= FIXED_STACK_AREA)
1126 return crtl->outgoing_args_size;
1127 else
1128 return FIXED_STACK_AREA;
1129 }
1130 return 0;
1131 }
1132
1133 /* Save RETS and FP, and allocate a stack frame. ALL is true if the
1134 function must save all its registers (true only for certain interrupt
1135 handlers). */
1136
1137 static void
1138 do_link (rtx spreg, HOST_WIDE_INT frame_size, bool all)
1139 {
1140 frame_size += arg_area_size ();
1141
1142 if (all
1143 || stack_frame_needed_p ()
1144 || (must_save_rets_p () && must_save_fp_p ()))
1145 emit_link_insn (spreg, frame_size);
1146 else
1147 {
1148 if (must_save_rets_p ())
1149 {
1150 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
1151 gen_rtx_PRE_DEC (Pmode, spreg)),
1152 bfin_rets_rtx);
1153 rtx insn = emit_insn (pat);
1154 RTX_FRAME_RELATED_P (insn) = 1;
1155 }
1156 if (must_save_fp_p ())
1157 {
1158 rtx pat = gen_movsi (gen_rtx_MEM (Pmode,
1159 gen_rtx_PRE_DEC (Pmode, spreg)),
1160 gen_rtx_REG (Pmode, REG_FP));
1161 rtx insn = emit_insn (pat);
1162 RTX_FRAME_RELATED_P (insn) = 1;
1163 }
1164 add_to_reg (spreg, -frame_size, 1, 0);
1165 }
1166 }
1167
1168 /* Like do_link, but used for epilogues to deallocate the stack frame.
1169 EPILOGUE_P is zero if this function is called for prologue,
1170 otherwise it's nonzero. And it's less than zero if this is for
1171 sibcall epilogue. */
1172
1173 static void
1174 do_unlink (rtx spreg, HOST_WIDE_INT frame_size, bool all, int epilogue_p)
1175 {
1176 frame_size += arg_area_size ();
1177
1178 if (stack_frame_needed_p ())
1179 emit_insn (gen_unlink ());
1180 else
1181 {
1182 rtx postinc = gen_rtx_MEM (Pmode, gen_rtx_POST_INC (Pmode, spreg));
1183
1184 add_to_reg (spreg, frame_size, 0, epilogue_p);
1185 if (all || must_save_fp_p ())
1186 {
1187 rtx fpreg = gen_rtx_REG (Pmode, REG_FP);
1188 emit_move_insn (fpreg, postinc);
1189 emit_use (fpreg);
1190 }
1191 if (all || must_save_rets_p ())
1192 {
1193 emit_move_insn (bfin_rets_rtx, postinc);
1194 emit_use (bfin_rets_rtx);
1195 }
1196 }
1197 }
1198
1199 /* Generate a prologue suitable for a function of kind FKIND. This is
1200 called for interrupt and exception handler prologues.
1201 SPREG contains (reg:SI REG_SP). */
1202
1203 static void
1204 expand_interrupt_handler_prologue (rtx spreg, e_funkind fkind, bool all)
1205 {
1206 HOST_WIDE_INT frame_size = get_frame_size ();
1207 rtx predec1 = gen_rtx_PRE_DEC (SImode, spreg);
1208 rtx predec = gen_rtx_MEM (SImode, predec1);
1209 rtx insn;
1210 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1211 tree kspisusp = lookup_attribute ("kspisusp", attrs);
1212
1213 if (kspisusp)
1214 {
1215 insn = emit_move_insn (spreg, gen_rtx_REG (Pmode, REG_USP));
1216 RTX_FRAME_RELATED_P (insn) = 1;
1217 }
1218
1219 /* We need space on the stack in case we need to save the argument
1220 registers. */
1221 if (fkind == EXCPT_HANDLER)
1222 {
1223 insn = emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (-12)));
1224 RTX_FRAME_RELATED_P (insn) = 1;
1225 }
1226
1227 /* If we're calling other functions, they won't save their call-clobbered
1228 registers, so we must save everything here. */
1229 if (!current_function_is_leaf)
1230 all = true;
1231 expand_prologue_reg_save (spreg, all, true);
1232
1233 if (ENABLE_WA_05000283 || ENABLE_WA_05000315)
1234 {
1235 rtx chipid = GEN_INT (trunc_int_for_mode (0xFFC00014, SImode));
1236 rtx p5reg = gen_rtx_REG (Pmode, REG_P5);
1237 emit_insn (gen_movbi (bfin_cc_rtx, const1_rtx));
1238 emit_insn (gen_movsi_high (p5reg, chipid));
1239 emit_insn (gen_movsi_low (p5reg, p5reg, chipid));
1240 emit_insn (gen_dummy_load (p5reg, bfin_cc_rtx));
1241 }
1242
1243 if (lookup_attribute ("nesting", attrs))
1244 {
1245 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1246 insn = emit_move_insn (predec, srcreg);
1247 RTX_FRAME_RELATED_P (insn) = 1;
1248 }
1249
1250 do_link (spreg, frame_size, all);
1251
1252 if (fkind == EXCPT_HANDLER)
1253 {
1254 rtx r0reg = gen_rtx_REG (SImode, REG_R0);
1255 rtx r1reg = gen_rtx_REG (SImode, REG_R1);
1256 rtx r2reg = gen_rtx_REG (SImode, REG_R2);
1257 rtx insn;
1258
1259 insn = emit_move_insn (r0reg, gen_rtx_REG (SImode, REG_SEQSTAT));
1260 insn = emit_insn (gen_ashrsi3 (r0reg, r0reg, GEN_INT (26)));
1261 insn = emit_insn (gen_ashlsi3 (r0reg, r0reg, GEN_INT (26)));
1262 insn = emit_move_insn (r1reg, spreg);
1263 insn = emit_move_insn (r2reg, gen_rtx_REG (Pmode, REG_FP));
1264 insn = emit_insn (gen_addsi3 (r2reg, r2reg, GEN_INT (8)));
1265 }
1266 }
1267
1268 /* Generate an epilogue suitable for a function of kind FKIND. This is
1269 called for interrupt and exception handler epilogues.
1270 SPREG contains (reg:SI REG_SP). */
1271
1272 static void
1273 expand_interrupt_handler_epilogue (rtx spreg, e_funkind fkind, bool all)
1274 {
1275 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1276 rtx postinc1 = gen_rtx_POST_INC (SImode, spreg);
1277 rtx postinc = gen_rtx_MEM (SImode, postinc1);
1278
1279 /* A slightly crude technique to stop flow from trying to delete "dead"
1280 insns. */
1281 MEM_VOLATILE_P (postinc) = 1;
1282
1283 do_unlink (spreg, get_frame_size (), all, 1);
1284
1285 if (lookup_attribute ("nesting", attrs))
1286 {
1287 rtx srcreg = gen_rtx_REG (Pmode, ret_regs[fkind]);
1288 emit_move_insn (srcreg, postinc);
1289 }
1290
1291 /* If we're calling other functions, they won't save their call-clobbered
1292 registers, so we must save (and restore) everything here. */
1293 if (!current_function_is_leaf)
1294 all = true;
1295
1296 expand_epilogue_reg_restore (spreg, all, true);
1297
1298 /* Deallocate any space we left on the stack in case we needed to save the
1299 argument registers. */
1300 if (fkind == EXCPT_HANDLER)
1301 emit_insn (gen_addsi3 (spreg, spreg, GEN_INT (12)));
1302
1303 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, ret_regs[fkind])));
1304 }
1305
1306 /* Used while emitting the prologue to generate code to load the correct value
1307 into the PIC register, which is passed in DEST. */
1308
1309 static rtx
1310 bfin_load_pic_reg (rtx dest)
1311 {
1312 struct cgraph_local_info *i = NULL;
1313 rtx addr, insn;
1314
1315 i = cgraph_local_info (current_function_decl);
1316
1317 /* Functions local to the translation unit don't need to reload the
1318 pic reg, since the caller always passes a usable one. */
1319 if (i && i->local)
1320 return pic_offset_table_rtx;
1321
1322 if (bfin_lib_id_given)
1323 addr = plus_constant (pic_offset_table_rtx, -4 - bfin_library_id * 4);
1324 else
1325 addr = gen_rtx_PLUS (Pmode, pic_offset_table_rtx,
1326 gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx),
1327 UNSPEC_LIBRARY_OFFSET));
1328 insn = emit_insn (gen_movsi (dest, gen_rtx_MEM (Pmode, addr)));
1329 return dest;
1330 }
1331
1332 /* Generate RTL for the prologue of the current function. */
1333
1334 void
1335 bfin_expand_prologue (void)
1336 {
1337 HOST_WIDE_INT frame_size = get_frame_size ();
1338 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1339 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1340 rtx pic_reg_loaded = NULL_RTX;
1341 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1342 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1343
1344 if (fkind != SUBROUTINE)
1345 {
1346 expand_interrupt_handler_prologue (spreg, fkind, all);
1347 return;
1348 }
1349
1350 if (crtl->limit_stack
1351 || (TARGET_STACK_CHECK_L1
1352 && !DECL_NO_LIMIT_STACK (current_function_decl)))
1353 {
1354 HOST_WIDE_INT offset
1355 = bfin_initial_elimination_offset (ARG_POINTER_REGNUM,
1356 STACK_POINTER_REGNUM);
1357 rtx lim = crtl->limit_stack ? stack_limit_rtx : NULL_RTX;
1358 rtx p2reg = gen_rtx_REG (Pmode, REG_P2);
1359
1360 if (!lim)
1361 {
1362 emit_move_insn (p2reg, gen_int_mode (0xFFB00000, SImode));
1363 emit_move_insn (p2reg, gen_rtx_MEM (Pmode, p2reg));
1364 lim = p2reg;
1365 }
1366 if (GET_CODE (lim) == SYMBOL_REF)
1367 {
1368 if (TARGET_ID_SHARED_LIBRARY)
1369 {
1370 rtx p1reg = gen_rtx_REG (Pmode, REG_P1);
1371 rtx val;
1372 pic_reg_loaded = bfin_load_pic_reg (p2reg);
1373 val = legitimize_pic_address (stack_limit_rtx, p1reg,
1374 pic_reg_loaded);
1375 emit_move_insn (p1reg, val);
1376 frame_related_constant_load (p2reg, offset, FALSE);
1377 emit_insn (gen_addsi3 (p2reg, p2reg, p1reg));
1378 lim = p2reg;
1379 }
1380 else
1381 {
1382 rtx limit = plus_constant (lim, offset);
1383 emit_move_insn (p2reg, limit);
1384 lim = p2reg;
1385 }
1386 }
1387 else
1388 {
1389 if (lim != p2reg)
1390 emit_move_insn (p2reg, lim);
1391 add_to_reg (p2reg, offset, 0, 0);
1392 lim = p2reg;
1393 }
1394 emit_insn (gen_compare_lt (bfin_cc_rtx, spreg, lim));
1395 emit_insn (gen_trapifcc ());
1396 }
1397 expand_prologue_reg_save (spreg, all, false);
1398
1399 do_link (spreg, frame_size, false);
1400
1401 if (TARGET_ID_SHARED_LIBRARY
1402 && !TARGET_SEP_DATA
1403 && (crtl->uses_pic_offset_table
1404 || !current_function_is_leaf))
1405 bfin_load_pic_reg (pic_offset_table_rtx);
1406 }
1407
1408 /* Generate RTL for the epilogue of the current function. NEED_RETURN is zero
1409 if this is for a sibcall. EH_RETURN is nonzero if we're expanding an
1410 eh_return pattern. SIBCALL_P is true if this is a sibcall epilogue,
1411 false otherwise. */
1412
1413 void
1414 bfin_expand_epilogue (int need_return, int eh_return, bool sibcall_p)
1415 {
1416 rtx spreg = gen_rtx_REG (Pmode, REG_SP);
1417 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
1418 int e = sibcall_p ? -1 : 1;
1419 tree attrs = TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl));
1420 bool all = lookup_attribute ("saveall", attrs) != NULL_TREE;
1421
1422 if (fkind != SUBROUTINE)
1423 {
1424 expand_interrupt_handler_epilogue (spreg, fkind, all);
1425 return;
1426 }
1427
1428 do_unlink (spreg, get_frame_size (), false, e);
1429
1430 expand_epilogue_reg_restore (spreg, all, false);
1431
1432 /* Omit the return insn if this is for a sibcall. */
1433 if (! need_return)
1434 return;
1435
1436 if (eh_return)
1437 emit_insn (gen_addsi3 (spreg, spreg, gen_rtx_REG (Pmode, REG_P2)));
1438
1439 emit_jump_insn (gen_return_internal (gen_rtx_REG (Pmode, REG_RETS)));
1440 }
1441 \f
1442 /* Return nonzero if register OLD_REG can be renamed to register NEW_REG. */
1443
1444 int
1445 bfin_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
1446 unsigned int new_reg)
1447 {
1448 /* Interrupt functions can only use registers that have already been
1449 saved by the prologue, even if they would normally be
1450 call-clobbered. */
1451
1452 if (funkind (TREE_TYPE (current_function_decl)) != SUBROUTINE
1453 && !df_regs_ever_live_p (new_reg))
1454 return 0;
1455
1456 return 1;
1457 }
1458
1459 /* Return the value of the return address for the frame COUNT steps up
1460 from the current frame, after the prologue.
1461 We punt for everything but the current frame by returning const0_rtx. */
1462
1463 rtx
1464 bfin_return_addr_rtx (int count)
1465 {
1466 if (count != 0)
1467 return const0_rtx;
1468
1469 return get_hard_reg_initial_val (Pmode, REG_RETS);
1470 }
1471
1472 static rtx
1473 bfin_delegitimize_address (rtx orig_x)
1474 {
1475 rtx x = orig_x;
1476
1477 if (GET_CODE (x) != MEM)
1478 return orig_x;
1479
1480 x = XEXP (x, 0);
1481 if (GET_CODE (x) == PLUS
1482 && GET_CODE (XEXP (x, 1)) == UNSPEC
1483 && XINT (XEXP (x, 1), 1) == UNSPEC_MOVE_PIC
1484 && GET_CODE (XEXP (x, 0)) == REG
1485 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
1486 return XVECEXP (XEXP (x, 1), 0, 0);
1487
1488 return orig_x;
1489 }
1490
1491 /* This predicate is used to compute the length of a load/store insn.
1492 OP is a MEM rtx, we return nonzero if its addressing mode requires a
1493 32-bit instruction. */
1494
1495 int
1496 effective_address_32bit_p (rtx op, enum machine_mode mode)
1497 {
1498 HOST_WIDE_INT offset;
1499
1500 mode = GET_MODE (op);
1501 op = XEXP (op, 0);
1502
1503 if (GET_CODE (op) != PLUS)
1504 {
1505 gcc_assert (REG_P (op) || GET_CODE (op) == POST_INC
1506 || GET_CODE (op) == PRE_DEC || GET_CODE (op) == POST_DEC);
1507 return 0;
1508 }
1509
1510 if (GET_CODE (XEXP (op, 1)) == UNSPEC)
1511 return 1;
1512
1513 offset = INTVAL (XEXP (op, 1));
1514
1515 /* All byte loads use a 16-bit offset. */
1516 if (GET_MODE_SIZE (mode) == 1)
1517 return 1;
1518
1519 if (GET_MODE_SIZE (mode) == 4)
1520 {
1521 /* Frame pointer relative loads can use a negative offset, all others
1522 are restricted to a small positive one. */
1523 if (XEXP (op, 0) == frame_pointer_rtx)
1524 return offset < -128 || offset > 60;
1525 return offset < 0 || offset > 60;
1526 }
1527
1528 /* Must be HImode now. */
1529 return offset < 0 || offset > 30;
1530 }
1531
1532 /* Returns true if X is a memory reference using an I register. */
1533 bool
1534 bfin_dsp_memref_p (rtx x)
1535 {
1536 if (! MEM_P (x))
1537 return false;
1538 x = XEXP (x, 0);
1539 if (GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_INC
1540 || GET_CODE (x) == POST_DEC || GET_CODE (x) == PRE_DEC)
1541 x = XEXP (x, 0);
1542 return IREG_P (x);
1543 }
1544
1545 /* Return cost of the memory address ADDR.
1546 All addressing modes are equally cheap on the Blackfin. */
1547
1548 static int
1549 bfin_address_cost (rtx addr ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
1550 {
1551 return 1;
1552 }
1553
1554 /* Subroutine of print_operand; used to print a memory reference X to FILE. */
1555
1556 void
1557 print_address_operand (FILE *file, rtx x)
1558 {
1559 switch (GET_CODE (x))
1560 {
1561 case PLUS:
1562 output_address (XEXP (x, 0));
1563 fprintf (file, "+");
1564 output_address (XEXP (x, 1));
1565 break;
1566
1567 case PRE_DEC:
1568 fprintf (file, "--");
1569 output_address (XEXP (x, 0));
1570 break;
1571 case POST_INC:
1572 output_address (XEXP (x, 0));
1573 fprintf (file, "++");
1574 break;
1575 case POST_DEC:
1576 output_address (XEXP (x, 0));
1577 fprintf (file, "--");
1578 break;
1579
1580 default:
1581 gcc_assert (GET_CODE (x) != MEM);
1582 print_operand (file, x, 0);
1583 break;
1584 }
1585 }
1586
1587 /* Adding intp DImode support by Tony
1588 * -- Q: (low word)
1589 * -- R: (high word)
1590 */
1591
1592 void
1593 print_operand (FILE *file, rtx x, char code)
1594 {
1595 enum machine_mode mode;
1596
1597 if (code == '!')
1598 {
1599 if (GET_MODE (current_output_insn) == SImode)
1600 fprintf (file, " ||");
1601 else
1602 fprintf (file, ";");
1603 return;
1604 }
1605
1606 mode = GET_MODE (x);
1607
1608 switch (code)
1609 {
1610 case 'j':
1611 switch (GET_CODE (x))
1612 {
1613 case EQ:
1614 fprintf (file, "e");
1615 break;
1616 case NE:
1617 fprintf (file, "ne");
1618 break;
1619 case GT:
1620 fprintf (file, "g");
1621 break;
1622 case LT:
1623 fprintf (file, "l");
1624 break;
1625 case GE:
1626 fprintf (file, "ge");
1627 break;
1628 case LE:
1629 fprintf (file, "le");
1630 break;
1631 case GTU:
1632 fprintf (file, "g");
1633 break;
1634 case LTU:
1635 fprintf (file, "l");
1636 break;
1637 case GEU:
1638 fprintf (file, "ge");
1639 break;
1640 case LEU:
1641 fprintf (file, "le");
1642 break;
1643 default:
1644 output_operand_lossage ("invalid %%j value");
1645 }
1646 break;
1647
1648 case 'J': /* reverse logic */
1649 switch (GET_CODE(x))
1650 {
1651 case EQ:
1652 fprintf (file, "ne");
1653 break;
1654 case NE:
1655 fprintf (file, "e");
1656 break;
1657 case GT:
1658 fprintf (file, "le");
1659 break;
1660 case LT:
1661 fprintf (file, "ge");
1662 break;
1663 case GE:
1664 fprintf (file, "l");
1665 break;
1666 case LE:
1667 fprintf (file, "g");
1668 break;
1669 case GTU:
1670 fprintf (file, "le");
1671 break;
1672 case LTU:
1673 fprintf (file, "ge");
1674 break;
1675 case GEU:
1676 fprintf (file, "l");
1677 break;
1678 case LEU:
1679 fprintf (file, "g");
1680 break;
1681 default:
1682 output_operand_lossage ("invalid %%J value");
1683 }
1684 break;
1685
1686 default:
1687 switch (GET_CODE (x))
1688 {
1689 case REG:
1690 if (code == 'h')
1691 {
1692 if (REGNO (x) < 32)
1693 fprintf (file, "%s", short_reg_names[REGNO (x)]);
1694 else
1695 output_operand_lossage ("invalid operand for code '%c'", code);
1696 }
1697 else if (code == 'd')
1698 {
1699 if (REGNO (x) < 32)
1700 fprintf (file, "%s", high_reg_names[REGNO (x)]);
1701 else
1702 output_operand_lossage ("invalid operand for code '%c'", code);
1703 }
1704 else if (code == 'w')
1705 {
1706 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1707 fprintf (file, "%s.w", reg_names[REGNO (x)]);
1708 else
1709 output_operand_lossage ("invalid operand for code '%c'", code);
1710 }
1711 else if (code == 'x')
1712 {
1713 if (REGNO (x) == REG_A0 || REGNO (x) == REG_A1)
1714 fprintf (file, "%s.x", reg_names[REGNO (x)]);
1715 else
1716 output_operand_lossage ("invalid operand for code '%c'", code);
1717 }
1718 else if (code == 'v')
1719 {
1720 if (REGNO (x) == REG_A0)
1721 fprintf (file, "AV0");
1722 else if (REGNO (x) == REG_A1)
1723 fprintf (file, "AV1");
1724 else
1725 output_operand_lossage ("invalid operand for code '%c'", code);
1726 }
1727 else if (code == 'D')
1728 {
1729 if (D_REGNO_P (REGNO (x)))
1730 fprintf (file, "%s", dregs_pair_names[REGNO (x)]);
1731 else
1732 output_operand_lossage ("invalid operand for code '%c'", code);
1733 }
1734 else if (code == 'H')
1735 {
1736 if ((mode == DImode || mode == DFmode) && REG_P (x))
1737 fprintf (file, "%s", reg_names[REGNO (x) + 1]);
1738 else
1739 output_operand_lossage ("invalid operand for code '%c'", code);
1740 }
1741 else if (code == 'T')
1742 {
1743 if (D_REGNO_P (REGNO (x)))
1744 fprintf (file, "%s", byte_reg_names[REGNO (x)]);
1745 else
1746 output_operand_lossage ("invalid operand for code '%c'", code);
1747 }
1748 else
1749 fprintf (file, "%s", reg_names[REGNO (x)]);
1750 break;
1751
1752 case MEM:
1753 fputc ('[', file);
1754 x = XEXP (x,0);
1755 print_address_operand (file, x);
1756 fputc (']', file);
1757 break;
1758
1759 case CONST_INT:
1760 if (code == 'M')
1761 {
1762 switch (INTVAL (x))
1763 {
1764 case MACFLAG_NONE:
1765 break;
1766 case MACFLAG_FU:
1767 fputs ("(FU)", file);
1768 break;
1769 case MACFLAG_T:
1770 fputs ("(T)", file);
1771 break;
1772 case MACFLAG_TFU:
1773 fputs ("(TFU)", file);
1774 break;
1775 case MACFLAG_W32:
1776 fputs ("(W32)", file);
1777 break;
1778 case MACFLAG_IS:
1779 fputs ("(IS)", file);
1780 break;
1781 case MACFLAG_IU:
1782 fputs ("(IU)", file);
1783 break;
1784 case MACFLAG_IH:
1785 fputs ("(IH)", file);
1786 break;
1787 case MACFLAG_M:
1788 fputs ("(M)", file);
1789 break;
1790 case MACFLAG_IS_M:
1791 fputs ("(IS,M)", file);
1792 break;
1793 case MACFLAG_ISS2:
1794 fputs ("(ISS2)", file);
1795 break;
1796 case MACFLAG_S2RND:
1797 fputs ("(S2RND)", file);
1798 break;
1799 default:
1800 gcc_unreachable ();
1801 }
1802 break;
1803 }
1804 else if (code == 'b')
1805 {
1806 if (INTVAL (x) == 0)
1807 fputs ("+=", file);
1808 else if (INTVAL (x) == 1)
1809 fputs ("-=", file);
1810 else
1811 gcc_unreachable ();
1812 break;
1813 }
1814 /* Moves to half registers with d or h modifiers always use unsigned
1815 constants. */
1816 else if (code == 'd')
1817 x = GEN_INT ((INTVAL (x) >> 16) & 0xffff);
1818 else if (code == 'h')
1819 x = GEN_INT (INTVAL (x) & 0xffff);
1820 else if (code == 'N')
1821 x = GEN_INT (-INTVAL (x));
1822 else if (code == 'X')
1823 x = GEN_INT (exact_log2 (0xffffffff & INTVAL (x)));
1824 else if (code == 'Y')
1825 x = GEN_INT (exact_log2 (0xffffffff & ~INTVAL (x)));
1826 else if (code == 'Z')
1827 /* Used for LINK insns. */
1828 x = GEN_INT (-8 - INTVAL (x));
1829
1830 /* fall through */
1831
1832 case SYMBOL_REF:
1833 output_addr_const (file, x);
1834 break;
1835
1836 case CONST_DOUBLE:
1837 output_operand_lossage ("invalid const_double operand");
1838 break;
1839
1840 case UNSPEC:
1841 switch (XINT (x, 1))
1842 {
1843 case UNSPEC_MOVE_PIC:
1844 output_addr_const (file, XVECEXP (x, 0, 0));
1845 fprintf (file, "@GOT");
1846 break;
1847
1848 case UNSPEC_MOVE_FDPIC:
1849 output_addr_const (file, XVECEXP (x, 0, 0));
1850 fprintf (file, "@GOT17M4");
1851 break;
1852
1853 case UNSPEC_FUNCDESC_GOT17M4:
1854 output_addr_const (file, XVECEXP (x, 0, 0));
1855 fprintf (file, "@FUNCDESC_GOT17M4");
1856 break;
1857
1858 case UNSPEC_LIBRARY_OFFSET:
1859 fprintf (file, "_current_shared_library_p5_offset_");
1860 break;
1861
1862 default:
1863 gcc_unreachable ();
1864 }
1865 break;
1866
1867 default:
1868 output_addr_const (file, x);
1869 }
1870 }
1871 }
1872 \f
1873 /* Argument support functions. */
1874
1875 /* Initialize a variable CUM of type CUMULATIVE_ARGS
1876 for a call to a function whose data type is FNTYPE.
1877 For a library call, FNTYPE is 0.
1878 VDSP C Compiler manual, our ABI says that
1879 first 3 words of arguments will use R0, R1 and R2.
1880 */
1881
1882 void
1883 init_cumulative_args (CUMULATIVE_ARGS *cum, tree fntype,
1884 rtx libname ATTRIBUTE_UNUSED)
1885 {
1886 static CUMULATIVE_ARGS zero_cum;
1887
1888 *cum = zero_cum;
1889
1890 /* Set up the number of registers to use for passing arguments. */
1891
1892 cum->nregs = max_arg_registers;
1893 cum->arg_regs = arg_regs;
1894
1895 cum->call_cookie = CALL_NORMAL;
1896 /* Check for a longcall attribute. */
1897 if (fntype && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (fntype)))
1898 cum->call_cookie |= CALL_SHORT;
1899 else if (fntype && lookup_attribute ("longcall", TYPE_ATTRIBUTES (fntype)))
1900 cum->call_cookie |= CALL_LONG;
1901
1902 return;
1903 }
1904
1905 /* Update the data in CUM to advance over an argument
1906 of mode MODE and data type TYPE.
1907 (TYPE is null for libcalls where that information may not be available.) */
1908
1909 void
1910 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
1911 int named ATTRIBUTE_UNUSED)
1912 {
1913 int count, bytes, words;
1914
1915 bytes = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1916 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
1917
1918 cum->words += words;
1919 cum->nregs -= words;
1920
1921 if (cum->nregs <= 0)
1922 {
1923 cum->nregs = 0;
1924 cum->arg_regs = NULL;
1925 }
1926 else
1927 {
1928 for (count = 1; count <= words; count++)
1929 cum->arg_regs++;
1930 }
1931
1932 return;
1933 }
1934
1935 /* Define where to put the arguments to a function.
1936 Value is zero to push the argument on the stack,
1937 or a hard register in which to store the argument.
1938
1939 MODE is the argument's machine mode.
1940 TYPE is the data type of the argument (as a tree).
1941 This is null for libcalls where that information may
1942 not be available.
1943 CUM is a variable of type CUMULATIVE_ARGS which gives info about
1944 the preceding args and about the function being called.
1945 NAMED is nonzero if this argument is a named parameter
1946 (otherwise it is an extra parameter matching an ellipsis). */
1947
1948 struct rtx_def *
1949 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode, tree type,
1950 int named ATTRIBUTE_UNUSED)
1951 {
1952 int bytes
1953 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1954
1955 if (mode == VOIDmode)
1956 /* Compute operand 2 of the call insn. */
1957 return GEN_INT (cum->call_cookie);
1958
1959 if (bytes == -1)
1960 return NULL_RTX;
1961
1962 if (cum->nregs)
1963 return gen_rtx_REG (mode, *(cum->arg_regs));
1964
1965 return NULL_RTX;
1966 }
1967
1968 /* For an arg passed partly in registers and partly in memory,
1969 this is the number of bytes passed in registers.
1970 For args passed entirely in registers or entirely in memory, zero.
1971
1972 Refer VDSP C Compiler manual, our ABI.
1973 First 3 words are in registers. So, if an argument is larger
1974 than the registers available, it will span the register and
1975 stack. */
1976
1977 static int
1978 bfin_arg_partial_bytes (CUMULATIVE_ARGS *cum, enum machine_mode mode,
1979 tree type ATTRIBUTE_UNUSED,
1980 bool named ATTRIBUTE_UNUSED)
1981 {
1982 int bytes
1983 = (mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode);
1984 int bytes_left = cum->nregs * UNITS_PER_WORD;
1985
1986 if (bytes == -1)
1987 return 0;
1988
1989 if (bytes_left == 0)
1990 return 0;
1991 if (bytes > bytes_left)
1992 return bytes_left;
1993 return 0;
1994 }
1995
1996 /* Variable sized types are passed by reference. */
1997
1998 static bool
1999 bfin_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
2000 enum machine_mode mode ATTRIBUTE_UNUSED,
2001 const_tree type, bool named ATTRIBUTE_UNUSED)
2002 {
2003 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
2004 }
2005
2006 /* Decide whether a type should be returned in memory (true)
2007 or in a register (false). This is called by the macro
2008 TARGET_RETURN_IN_MEMORY. */
2009
2010 static bool
2011 bfin_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
2012 {
2013 int size = int_size_in_bytes (type);
2014 return size > 2 * UNITS_PER_WORD || size == -1;
2015 }
2016
2017 /* Register in which address to store a structure value
2018 is passed to a function. */
2019 static rtx
2020 bfin_struct_value_rtx (tree fntype ATTRIBUTE_UNUSED,
2021 int incoming ATTRIBUTE_UNUSED)
2022 {
2023 return gen_rtx_REG (Pmode, REG_P0);
2024 }
2025
2026 /* Return true when register may be used to pass function parameters. */
2027
2028 bool
2029 function_arg_regno_p (int n)
2030 {
2031 int i;
2032 for (i = 0; arg_regs[i] != -1; i++)
2033 if (n == arg_regs[i])
2034 return true;
2035 return false;
2036 }
2037
2038 /* Returns 1 if OP contains a symbol reference */
2039
2040 int
2041 symbolic_reference_mentioned_p (rtx op)
2042 {
2043 register const char *fmt;
2044 register int i;
2045
2046 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
2047 return 1;
2048
2049 fmt = GET_RTX_FORMAT (GET_CODE (op));
2050 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
2051 {
2052 if (fmt[i] == 'E')
2053 {
2054 register int j;
2055
2056 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
2057 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
2058 return 1;
2059 }
2060
2061 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
2062 return 1;
2063 }
2064
2065 return 0;
2066 }
2067
2068 /* Decide whether we can make a sibling call to a function. DECL is the
2069 declaration of the function being targeted by the call and EXP is the
2070 CALL_EXPR representing the call. */
2071
2072 static bool
2073 bfin_function_ok_for_sibcall (tree decl ATTRIBUTE_UNUSED,
2074 tree exp ATTRIBUTE_UNUSED)
2075 {
2076 struct cgraph_local_info *this_func, *called_func;
2077 e_funkind fkind = funkind (TREE_TYPE (current_function_decl));
2078 if (fkind != SUBROUTINE)
2079 return false;
2080 if (!TARGET_ID_SHARED_LIBRARY || TARGET_SEP_DATA)
2081 return true;
2082
2083 /* When compiling for ID shared libraries, can't sibcall a local function
2084 from a non-local function, because the local function thinks it does
2085 not need to reload P5 in the prologue, but the sibcall wil pop P5 in the
2086 sibcall epilogue, and we end up with the wrong value in P5. */
2087
2088 if (!decl)
2089 /* Not enough information. */
2090 return false;
2091
2092 this_func = cgraph_local_info (current_function_decl);
2093 called_func = cgraph_local_info (decl);
2094 return !called_func->local || this_func->local;
2095 }
2096 \f
2097 /* Emit RTL insns to initialize the variable parts of a trampoline at
2098 TRAMP. FNADDR is an RTX for the address of the function's pure
2099 code. CXT is an RTX for the static chain value for the function. */
2100
2101 void
2102 initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
2103 {
2104 rtx t1 = copy_to_reg (fnaddr);
2105 rtx t2 = copy_to_reg (cxt);
2106 rtx addr;
2107 int i = 0;
2108
2109 if (TARGET_FDPIC)
2110 {
2111 rtx a = memory_address (Pmode, plus_constant (tramp, 8));
2112 addr = memory_address (Pmode, tramp);
2113 emit_move_insn (gen_rtx_MEM (SImode, addr), a);
2114 i = 8;
2115 }
2116
2117 addr = memory_address (Pmode, plus_constant (tramp, i + 2));
2118 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
2119 emit_insn (gen_ashrsi3 (t1, t1, GEN_INT (16)));
2120 addr = memory_address (Pmode, plus_constant (tramp, i + 6));
2121 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t1));
2122
2123 addr = memory_address (Pmode, plus_constant (tramp, i + 10));
2124 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
2125 emit_insn (gen_ashrsi3 (t2, t2, GEN_INT (16)));
2126 addr = memory_address (Pmode, plus_constant (tramp, i + 14));
2127 emit_move_insn (gen_rtx_MEM (HImode, addr), gen_lowpart (HImode, t2));
2128 }
2129
2130 /* Emit insns to move operands[1] into operands[0]. */
2131
2132 void
2133 emit_pic_move (rtx *operands, enum machine_mode mode ATTRIBUTE_UNUSED)
2134 {
2135 rtx temp = reload_in_progress ? operands[0] : gen_reg_rtx (Pmode);
2136
2137 gcc_assert (!TARGET_FDPIC || !(reload_in_progress || reload_completed));
2138 if (GET_CODE (operands[0]) == MEM && SYMBOLIC_CONST (operands[1]))
2139 operands[1] = force_reg (SImode, operands[1]);
2140 else
2141 operands[1] = legitimize_pic_address (operands[1], temp,
2142 TARGET_FDPIC ? OUR_FDPIC_REG
2143 : pic_offset_table_rtx);
2144 }
2145
2146 /* Expand a move operation in mode MODE. The operands are in OPERANDS.
2147 Returns true if no further code must be generated, false if the caller
2148 should generate an insn to move OPERANDS[1] to OPERANDS[0]. */
2149
2150 bool
2151 expand_move (rtx *operands, enum machine_mode mode)
2152 {
2153 rtx op = operands[1];
2154 if ((TARGET_ID_SHARED_LIBRARY || TARGET_FDPIC)
2155 && SYMBOLIC_CONST (op))
2156 emit_pic_move (operands, mode);
2157 else if (mode == SImode && GET_CODE (op) == CONST
2158 && GET_CODE (XEXP (op, 0)) == PLUS
2159 && GET_CODE (XEXP (XEXP (op, 0), 0)) == SYMBOL_REF
2160 && !bfin_legitimate_constant_p (op))
2161 {
2162 rtx dest = operands[0];
2163 rtx op0, op1;
2164 gcc_assert (!reload_in_progress && !reload_completed);
2165 op = XEXP (op, 0);
2166 op0 = force_reg (mode, XEXP (op, 0));
2167 op1 = XEXP (op, 1);
2168 if (!insn_data[CODE_FOR_addsi3].operand[2].predicate (op1, mode))
2169 op1 = force_reg (mode, op1);
2170 if (GET_CODE (dest) == MEM)
2171 dest = gen_reg_rtx (mode);
2172 emit_insn (gen_addsi3 (dest, op0, op1));
2173 if (dest == operands[0])
2174 return true;
2175 operands[1] = dest;
2176 }
2177 /* Don't generate memory->memory or constant->memory moves, go through a
2178 register */
2179 else if ((reload_in_progress | reload_completed) == 0
2180 && GET_CODE (operands[0]) == MEM
2181 && GET_CODE (operands[1]) != REG)
2182 operands[1] = force_reg (mode, operands[1]);
2183 return false;
2184 }
2185 \f
2186 /* Split one or more DImode RTL references into pairs of SImode
2187 references. The RTL can be REG, offsettable MEM, integer constant, or
2188 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
2189 split and "num" is its length. lo_half and hi_half are output arrays
2190 that parallel "operands". */
2191
2192 void
2193 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
2194 {
2195 while (num--)
2196 {
2197 rtx op = operands[num];
2198
2199 /* simplify_subreg refuse to split volatile memory addresses,
2200 but we still have to handle it. */
2201 if (GET_CODE (op) == MEM)
2202 {
2203 lo_half[num] = adjust_address (op, SImode, 0);
2204 hi_half[num] = adjust_address (op, SImode, 4);
2205 }
2206 else
2207 {
2208 lo_half[num] = simplify_gen_subreg (SImode, op,
2209 GET_MODE (op) == VOIDmode
2210 ? DImode : GET_MODE (op), 0);
2211 hi_half[num] = simplify_gen_subreg (SImode, op,
2212 GET_MODE (op) == VOIDmode
2213 ? DImode : GET_MODE (op), 4);
2214 }
2215 }
2216 }
2217 \f
2218 bool
2219 bfin_longcall_p (rtx op, int call_cookie)
2220 {
2221 gcc_assert (GET_CODE (op) == SYMBOL_REF);
2222 if (call_cookie & CALL_SHORT)
2223 return 0;
2224 if (call_cookie & CALL_LONG)
2225 return 1;
2226 if (TARGET_LONG_CALLS)
2227 return 1;
2228 return 0;
2229 }
2230
2231 /* Expand a call instruction. FNADDR is the call target, RETVAL the return value.
2232 COOKIE is a CONST_INT holding the call_cookie prepared init_cumulative_args.
2233 SIBCALL is nonzero if this is a sibling call. */
2234
2235 void
2236 bfin_expand_call (rtx retval, rtx fnaddr, rtx callarg1, rtx cookie, int sibcall)
2237 {
2238 rtx use = NULL, call;
2239 rtx callee = XEXP (fnaddr, 0);
2240 int nelts = 3;
2241 rtx pat;
2242 rtx picreg = get_hard_reg_initial_val (SImode, FDPIC_REGNO);
2243 rtx retsreg = gen_rtx_REG (Pmode, REG_RETS);
2244 int n;
2245
2246 /* In an untyped call, we can get NULL for operand 2. */
2247 if (cookie == NULL_RTX)
2248 cookie = const0_rtx;
2249
2250 /* Static functions and indirect calls don't need the pic register. */
2251 if (!TARGET_FDPIC && flag_pic
2252 && GET_CODE (callee) == SYMBOL_REF
2253 && !SYMBOL_REF_LOCAL_P (callee))
2254 use_reg (&use, pic_offset_table_rtx);
2255
2256 if (TARGET_FDPIC)
2257 {
2258 int caller_has_l1_text, callee_has_l1_text;
2259
2260 caller_has_l1_text = callee_has_l1_text = 0;
2261
2262 if (lookup_attribute ("l1_text",
2263 DECL_ATTRIBUTES (cfun->decl)) != NULL_TREE)
2264 caller_has_l1_text = 1;
2265
2266 if (GET_CODE (callee) == SYMBOL_REF
2267 && SYMBOL_REF_DECL (callee) && DECL_P (SYMBOL_REF_DECL (callee))
2268 && lookup_attribute
2269 ("l1_text",
2270 DECL_ATTRIBUTES (SYMBOL_REF_DECL (callee))) != NULL_TREE)
2271 callee_has_l1_text = 1;
2272
2273 if (GET_CODE (callee) != SYMBOL_REF
2274 || bfin_longcall_p (callee, INTVAL (cookie))
2275 || (GET_CODE (callee) == SYMBOL_REF
2276 && !SYMBOL_REF_LOCAL_P (callee)
2277 && TARGET_INLINE_PLT)
2278 || caller_has_l1_text != callee_has_l1_text
2279 || (caller_has_l1_text && callee_has_l1_text
2280 && (GET_CODE (callee) != SYMBOL_REF
2281 || !SYMBOL_REF_LOCAL_P (callee))))
2282 {
2283 rtx addr = callee;
2284 if (! address_operand (addr, Pmode))
2285 addr = force_reg (Pmode, addr);
2286
2287 fnaddr = gen_reg_rtx (SImode);
2288 emit_insn (gen_load_funcdescsi (fnaddr, addr));
2289 fnaddr = gen_rtx_MEM (Pmode, fnaddr);
2290
2291 picreg = gen_reg_rtx (SImode);
2292 emit_insn (gen_load_funcdescsi (picreg,
2293 plus_constant (addr, 4)));
2294 }
2295
2296 nelts++;
2297 }
2298 else if ((!register_no_elim_operand (callee, Pmode)
2299 && GET_CODE (callee) != SYMBOL_REF)
2300 || (GET_CODE (callee) == SYMBOL_REF
2301 && ((TARGET_ID_SHARED_LIBRARY && !TARGET_LEAF_ID_SHARED_LIBRARY)
2302 || bfin_longcall_p (callee, INTVAL (cookie)))))
2303 {
2304 callee = copy_to_mode_reg (Pmode, callee);
2305 fnaddr = gen_rtx_MEM (Pmode, callee);
2306 }
2307 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
2308
2309 if (retval)
2310 call = gen_rtx_SET (VOIDmode, retval, call);
2311
2312 pat = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (nelts));
2313 n = 0;
2314 XVECEXP (pat, 0, n++) = call;
2315 if (TARGET_FDPIC)
2316 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, picreg);
2317 XVECEXP (pat, 0, n++) = gen_rtx_USE (VOIDmode, cookie);
2318 if (sibcall)
2319 XVECEXP (pat, 0, n++) = gen_rtx_RETURN (VOIDmode);
2320 else
2321 XVECEXP (pat, 0, n++) = gen_rtx_CLOBBER (VOIDmode, retsreg);
2322 call = emit_call_insn (pat);
2323 if (use)
2324 CALL_INSN_FUNCTION_USAGE (call) = use;
2325 }
2326 \f
2327 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
2328
2329 int
2330 hard_regno_mode_ok (int regno, enum machine_mode mode)
2331 {
2332 /* Allow only dregs to store value of mode HI or QI */
2333 enum reg_class rclass = REGNO_REG_CLASS (regno);
2334
2335 if (mode == CCmode)
2336 return 0;
2337
2338 if (mode == V2HImode)
2339 return D_REGNO_P (regno);
2340 if (rclass == CCREGS)
2341 return mode == BImode;
2342 if (mode == PDImode || mode == V2PDImode)
2343 return regno == REG_A0 || regno == REG_A1;
2344
2345 /* Allow all normal 32-bit regs, except REG_M3, in case regclass ever comes
2346 up with a bad register class (such as ALL_REGS) for DImode. */
2347 if (mode == DImode)
2348 return regno < REG_M3;
2349
2350 if (mode == SImode
2351 && TEST_HARD_REG_BIT (reg_class_contents[PROLOGUE_REGS], regno))
2352 return 1;
2353
2354 return TEST_HARD_REG_BIT (reg_class_contents[MOST_REGS], regno);
2355 }
2356
2357 /* Implements target hook vector_mode_supported_p. */
2358
2359 static bool
2360 bfin_vector_mode_supported_p (enum machine_mode mode)
2361 {
2362 return mode == V2HImode;
2363 }
2364
2365 /* Return the cost of moving data from a register in class CLASS1 to
2366 one in class CLASS2. A cost of 2 is the default. */
2367
2368 int
2369 bfin_register_move_cost (enum machine_mode mode,
2370 enum reg_class class1, enum reg_class class2)
2371 {
2372 /* These need secondary reloads, so they're more expensive. */
2373 if ((class1 == CCREGS && !reg_class_subset_p (class2, DREGS))
2374 || (class2 == CCREGS && !reg_class_subset_p (class1, DREGS)))
2375 return 4;
2376
2377 /* If optimizing for size, always prefer reg-reg over reg-memory moves. */
2378 if (optimize_size)
2379 return 2;
2380
2381 if (GET_MODE_CLASS (mode) == MODE_INT)
2382 {
2383 /* Discourage trying to use the accumulators. */
2384 if (TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A0)
2385 || TEST_HARD_REG_BIT (reg_class_contents[class1], REG_A1)
2386 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A0)
2387 || TEST_HARD_REG_BIT (reg_class_contents[class2], REG_A1))
2388 return 20;
2389 }
2390 return 2;
2391 }
2392
2393 /* Return the cost of moving data of mode M between a
2394 register and memory. A value of 2 is the default; this cost is
2395 relative to those in `REGISTER_MOVE_COST'.
2396
2397 ??? In theory L1 memory has single-cycle latency. We should add a switch
2398 that tells the compiler whether we expect to use only L1 memory for the
2399 program; it'll make the costs more accurate. */
2400
2401 int
2402 bfin_memory_move_cost (enum machine_mode mode ATTRIBUTE_UNUSED,
2403 enum reg_class rclass,
2404 int in ATTRIBUTE_UNUSED)
2405 {
2406 /* Make memory accesses slightly more expensive than any register-register
2407 move. Also, penalize non-DP registers, since they need secondary
2408 reloads to load and store. */
2409 if (! reg_class_subset_p (rclass, DPREGS))
2410 return 10;
2411
2412 return 8;
2413 }
2414
2415 /* Inform reload about cases where moving X with a mode MODE to a register in
2416 RCLASS requires an extra scratch register. Return the class needed for the
2417 scratch register. */
2418
2419 static enum reg_class
2420 bfin_secondary_reload (bool in_p, rtx x, enum reg_class rclass,
2421 enum machine_mode mode, secondary_reload_info *sri)
2422 {
2423 /* If we have HImode or QImode, we can only use DREGS as secondary registers;
2424 in most other cases we can also use PREGS. */
2425 enum reg_class default_class = GET_MODE_SIZE (mode) >= 4 ? DPREGS : DREGS;
2426 enum reg_class x_class = NO_REGS;
2427 enum rtx_code code = GET_CODE (x);
2428
2429 if (code == SUBREG)
2430 x = SUBREG_REG (x), code = GET_CODE (x);
2431 if (REG_P (x))
2432 {
2433 int regno = REGNO (x);
2434 if (regno >= FIRST_PSEUDO_REGISTER)
2435 regno = reg_renumber[regno];
2436
2437 if (regno == -1)
2438 code = MEM;
2439 else
2440 x_class = REGNO_REG_CLASS (regno);
2441 }
2442
2443 /* We can be asked to reload (plus (FP) (large_constant)) into a DREG.
2444 This happens as a side effect of register elimination, and we need
2445 a scratch register to do it. */
2446 if (fp_plus_const_operand (x, mode))
2447 {
2448 rtx op2 = XEXP (x, 1);
2449 int large_constant_p = ! satisfies_constraint_Ks7 (op2);
2450
2451 if (rclass == PREGS || rclass == PREGS_CLOBBERED)
2452 return NO_REGS;
2453 /* If destination is a DREG, we can do this without a scratch register
2454 if the constant is valid for an add instruction. */
2455 if ((rclass == DREGS || rclass == DPREGS)
2456 && ! large_constant_p)
2457 return NO_REGS;
2458 /* Reloading to anything other than a DREG? Use a PREG scratch
2459 register. */
2460 sri->icode = CODE_FOR_reload_insi;
2461 return NO_REGS;
2462 }
2463
2464 /* Data can usually be moved freely between registers of most classes.
2465 AREGS are an exception; they can only move to or from another register
2466 in AREGS or one in DREGS. They can also be assigned the constant 0. */
2467 if (x_class == AREGS || x_class == EVEN_AREGS || x_class == ODD_AREGS)
2468 return (rclass == DREGS || rclass == AREGS || rclass == EVEN_AREGS
2469 || rclass == ODD_AREGS
2470 ? NO_REGS : DREGS);
2471
2472 if (rclass == AREGS || rclass == EVEN_AREGS || rclass == ODD_AREGS)
2473 {
2474 if (code == MEM)
2475 {
2476 sri->icode = in_p ? CODE_FOR_reload_inpdi : CODE_FOR_reload_outpdi;
2477 return NO_REGS;
2478 }
2479
2480 if (x != const0_rtx && x_class != DREGS)
2481 {
2482 return DREGS;
2483 }
2484 else
2485 return NO_REGS;
2486 }
2487
2488 /* CCREGS can only be moved from/to DREGS. */
2489 if (rclass == CCREGS && x_class != DREGS)
2490 return DREGS;
2491 if (x_class == CCREGS && rclass != DREGS)
2492 return DREGS;
2493
2494 /* All registers other than AREGS can load arbitrary constants. The only
2495 case that remains is MEM. */
2496 if (code == MEM)
2497 if (! reg_class_subset_p (rclass, default_class))
2498 return default_class;
2499
2500 return NO_REGS;
2501 }
2502 \f
2503 /* Implement TARGET_HANDLE_OPTION. */
2504
2505 static bool
2506 bfin_handle_option (size_t code, const char *arg, int value)
2507 {
2508 switch (code)
2509 {
2510 case OPT_mshared_library_id_:
2511 if (value > MAX_LIBRARY_ID)
2512 error ("-mshared-library-id=%s is not between 0 and %d",
2513 arg, MAX_LIBRARY_ID);
2514 bfin_lib_id_given = 1;
2515 return true;
2516
2517 case OPT_mcpu_:
2518 {
2519 const char *p, *q;
2520 int i;
2521
2522 i = 0;
2523 while ((p = bfin_cpus[i].name) != NULL)
2524 {
2525 if (strncmp (arg, p, strlen (p)) == 0)
2526 break;
2527 i++;
2528 }
2529
2530 if (p == NULL)
2531 {
2532 error ("-mcpu=%s is not valid", arg);
2533 return false;
2534 }
2535
2536 bfin_cpu_type = bfin_cpus[i].type;
2537
2538 q = arg + strlen (p);
2539
2540 if (*q == '\0')
2541 {
2542 bfin_si_revision = bfin_cpus[i].si_revision;
2543 bfin_workarounds |= bfin_cpus[i].workarounds;
2544 }
2545 else if (strcmp (q, "-none") == 0)
2546 bfin_si_revision = -1;
2547 else if (strcmp (q, "-any") == 0)
2548 {
2549 bfin_si_revision = 0xffff;
2550 while (bfin_cpus[i].type == bfin_cpu_type)
2551 {
2552 bfin_workarounds |= bfin_cpus[i].workarounds;
2553 i++;
2554 }
2555 }
2556 else
2557 {
2558 unsigned int si_major, si_minor;
2559 int rev_len, n;
2560
2561 rev_len = strlen (q);
2562
2563 if (sscanf (q, "-%u.%u%n", &si_major, &si_minor, &n) != 2
2564 || n != rev_len
2565 || si_major > 0xff || si_minor > 0xff)
2566 {
2567 invalid_silicon_revision:
2568 error ("-mcpu=%s has invalid silicon revision", arg);
2569 return false;
2570 }
2571
2572 bfin_si_revision = (si_major << 8) | si_minor;
2573
2574 while (bfin_cpus[i].type == bfin_cpu_type
2575 && bfin_cpus[i].si_revision != bfin_si_revision)
2576 i++;
2577
2578 if (bfin_cpus[i].type != bfin_cpu_type)
2579 goto invalid_silicon_revision;
2580
2581 bfin_workarounds |= bfin_cpus[i].workarounds;
2582 }
2583
2584 return true;
2585 }
2586
2587 default:
2588 return true;
2589 }
2590 }
2591
2592 static struct machine_function *
2593 bfin_init_machine_status (void)
2594 {
2595 struct machine_function *f;
2596
2597 f = GGC_CNEW (struct machine_function);
2598
2599 return f;
2600 }
2601
2602 /* Implement the macro OVERRIDE_OPTIONS. */
2603
2604 void
2605 override_options (void)
2606 {
2607 /* If processor type is not specified, enable all workarounds. */
2608 if (bfin_cpu_type == BFIN_CPU_UNKNOWN)
2609 {
2610 int i;
2611
2612 for (i = 0; bfin_cpus[i].name != NULL; i++)
2613 bfin_workarounds |= bfin_cpus[i].workarounds;
2614
2615 bfin_si_revision = 0xffff;
2616 }
2617
2618 if (bfin_csync_anomaly == 1)
2619 bfin_workarounds |= WA_SPECULATIVE_SYNCS;
2620 else if (bfin_csync_anomaly == 0)
2621 bfin_workarounds &= ~WA_SPECULATIVE_SYNCS;
2622
2623 if (bfin_specld_anomaly == 1)
2624 bfin_workarounds |= WA_SPECULATIVE_LOADS;
2625 else if (bfin_specld_anomaly == 0)
2626 bfin_workarounds &= ~WA_SPECULATIVE_LOADS;
2627
2628 if (TARGET_OMIT_LEAF_FRAME_POINTER)
2629 flag_omit_frame_pointer = 1;
2630
2631 /* Library identification */
2632 if (bfin_lib_id_given && ! TARGET_ID_SHARED_LIBRARY)
2633 error ("-mshared-library-id= specified without -mid-shared-library");
2634
2635 if (stack_limit_rtx && TARGET_STACK_CHECK_L1)
2636 error ("Can't use multiple stack checking methods together.");
2637
2638 if (TARGET_ID_SHARED_LIBRARY && TARGET_FDPIC)
2639 error ("ID shared libraries and FD-PIC mode can't be used together.");
2640
2641 /* Don't allow the user to specify -mid-shared-library and -msep-data
2642 together, as it makes little sense from a user's point of view... */
2643 if (TARGET_SEP_DATA && TARGET_ID_SHARED_LIBRARY)
2644 error ("cannot specify both -msep-data and -mid-shared-library");
2645 /* ... internally, however, it's nearly the same. */
2646 if (TARGET_SEP_DATA)
2647 target_flags |= MASK_ID_SHARED_LIBRARY | MASK_LEAF_ID_SHARED_LIBRARY;
2648
2649 if (TARGET_ID_SHARED_LIBRARY && flag_pic == 0)
2650 flag_pic = 1;
2651
2652 /* There is no single unaligned SI op for PIC code. Sometimes we
2653 need to use ".4byte" and sometimes we need to use ".picptr".
2654 See bfin_assemble_integer for details. */
2655 if (TARGET_FDPIC)
2656 targetm.asm_out.unaligned_op.si = 0;
2657
2658 /* Silently turn off flag_pic if not doing FDPIC or ID shared libraries,
2659 since we don't support it and it'll just break. */
2660 if (flag_pic && !TARGET_FDPIC && !TARGET_ID_SHARED_LIBRARY)
2661 flag_pic = 0;
2662
2663 if (TARGET_MULTICORE && bfin_cpu_type != BFIN_CPU_BF561)
2664 error ("-mmulticore can only be used with BF561");
2665
2666 if (TARGET_COREA && !TARGET_MULTICORE)
2667 error ("-mcorea should be used with -mmulticore");
2668
2669 if (TARGET_COREB && !TARGET_MULTICORE)
2670 error ("-mcoreb should be used with -mmulticore");
2671
2672 if (TARGET_COREA && TARGET_COREB)
2673 error ("-mcorea and -mcoreb can't be used together");
2674
2675 flag_schedule_insns = 0;
2676
2677 /* Passes after sched2 can break the helpful TImode annotations that
2678 haifa-sched puts on every insn. Just do scheduling in reorg. */
2679 bfin_flag_schedule_insns2 = flag_schedule_insns_after_reload;
2680 flag_schedule_insns_after_reload = 0;
2681
2682 init_machine_status = bfin_init_machine_status;
2683 }
2684
2685 /* Return the destination address of BRANCH.
2686 We need to use this instead of get_attr_length, because the
2687 cbranch_with_nops pattern conservatively sets its length to 6, and
2688 we still prefer to use shorter sequences. */
2689
2690 static int
2691 branch_dest (rtx branch)
2692 {
2693 rtx dest;
2694 int dest_uid;
2695 rtx pat = PATTERN (branch);
2696 if (GET_CODE (pat) == PARALLEL)
2697 pat = XVECEXP (pat, 0, 0);
2698 dest = SET_SRC (pat);
2699 if (GET_CODE (dest) == IF_THEN_ELSE)
2700 dest = XEXP (dest, 1);
2701 dest = XEXP (dest, 0);
2702 dest_uid = INSN_UID (dest);
2703 return INSN_ADDRESSES (dest_uid);
2704 }
2705
2706 /* Return nonzero if INSN is annotated with a REG_BR_PROB note that indicates
2707 it's a branch that's predicted taken. */
2708
2709 static int
2710 cbranch_predicted_taken_p (rtx insn)
2711 {
2712 rtx x = find_reg_note (insn, REG_BR_PROB, 0);
2713
2714 if (x)
2715 {
2716 int pred_val = INTVAL (XEXP (x, 0));
2717
2718 return pred_val >= REG_BR_PROB_BASE / 2;
2719 }
2720
2721 return 0;
2722 }
2723
2724 /* Templates for use by asm_conditional_branch. */
2725
2726 static const char *ccbranch_templates[][3] = {
2727 { "if !cc jump %3;", "if cc jump 4 (bp); jump.s %3;", "if cc jump 6 (bp); jump.l %3;" },
2728 { "if cc jump %3;", "if !cc jump 4 (bp); jump.s %3;", "if !cc jump 6 (bp); jump.l %3;" },
2729 { "if !cc jump %3 (bp);", "if cc jump 4; jump.s %3;", "if cc jump 6; jump.l %3;" },
2730 { "if cc jump %3 (bp);", "if !cc jump 4; jump.s %3;", "if !cc jump 6; jump.l %3;" },
2731 };
2732
2733 /* Output INSN, which is a conditional branch instruction with operands
2734 OPERANDS.
2735
2736 We deal with the various forms of conditional branches that can be generated
2737 by bfin_reorg to prevent the hardware from doing speculative loads, by
2738 - emitting a sufficient number of nops, if N_NOPS is nonzero, or
2739 - always emitting the branch as predicted taken, if PREDICT_TAKEN is true.
2740 Either of these is only necessary if the branch is short, otherwise the
2741 template we use ends in an unconditional jump which flushes the pipeline
2742 anyway. */
2743
2744 void
2745 asm_conditional_branch (rtx insn, rtx *operands, int n_nops, int predict_taken)
2746 {
2747 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2748 /* Note : offset for instructions like if cc jmp; jump.[sl] offset
2749 is to be taken from start of if cc rather than jump.
2750 Range for jump.s is (-4094, 4096) instead of (-4096, 4094)
2751 */
2752 int len = (offset >= -1024 && offset <= 1022 ? 0
2753 : offset >= -4094 && offset <= 4096 ? 1
2754 : 2);
2755 int bp = predict_taken && len == 0 ? 1 : cbranch_predicted_taken_p (insn);
2756 int idx = (bp << 1) | (GET_CODE (operands[0]) == EQ ? BRF : BRT);
2757 output_asm_insn (ccbranch_templates[idx][len], operands);
2758 gcc_assert (n_nops == 0 || !bp);
2759 if (len == 0)
2760 while (n_nops-- > 0)
2761 output_asm_insn ("nop;", NULL);
2762 }
2763
2764 /* Emit rtl for a comparison operation CMP in mode MODE. Operands have been
2765 stored in bfin_compare_op0 and bfin_compare_op1 already. */
2766
2767 rtx
2768 bfin_gen_compare (rtx cmp, enum machine_mode mode ATTRIBUTE_UNUSED)
2769 {
2770 enum rtx_code code1, code2;
2771 rtx op0 = XEXP (cmp, 0), op1 = XEXP (cmp, 1);
2772 rtx tem = bfin_cc_rtx;
2773 enum rtx_code code = GET_CODE (cmp);
2774
2775 /* If we have a BImode input, then we already have a compare result, and
2776 do not need to emit another comparison. */
2777 if (GET_MODE (op0) == BImode)
2778 {
2779 gcc_assert ((code == NE || code == EQ) && op1 == const0_rtx);
2780 tem = op0, code2 = code;
2781 }
2782 else
2783 {
2784 switch (code) {
2785 /* bfin has these conditions */
2786 case EQ:
2787 case LT:
2788 case LE:
2789 case LEU:
2790 case LTU:
2791 code1 = code;
2792 code2 = NE;
2793 break;
2794 default:
2795 code1 = reverse_condition (code);
2796 code2 = EQ;
2797 break;
2798 }
2799 emit_insn (gen_rtx_SET (VOIDmode, tem,
2800 gen_rtx_fmt_ee (code1, BImode, op0, op1)));
2801 }
2802
2803 return gen_rtx_fmt_ee (code2, BImode, tem, CONST0_RTX (BImode));
2804 }
2805 \f
2806 /* Return nonzero iff C has exactly one bit set if it is interpreted
2807 as a 32-bit constant. */
2808
2809 int
2810 log2constp (unsigned HOST_WIDE_INT c)
2811 {
2812 c &= 0xFFFFFFFF;
2813 return c != 0 && (c & (c-1)) == 0;
2814 }
2815
2816 /* Returns the number of consecutive least significant zeros in the binary
2817 representation of *V.
2818 We modify *V to contain the original value arithmetically shifted right by
2819 the number of zeroes. */
2820
2821 static int
2822 shiftr_zero (HOST_WIDE_INT *v)
2823 {
2824 unsigned HOST_WIDE_INT tmp = *v;
2825 unsigned HOST_WIDE_INT sgn;
2826 int n = 0;
2827
2828 if (tmp == 0)
2829 return 0;
2830
2831 sgn = tmp & ((unsigned HOST_WIDE_INT) 1 << (HOST_BITS_PER_WIDE_INT - 1));
2832 while ((tmp & 0x1) == 0 && n <= 32)
2833 {
2834 tmp = (tmp >> 1) | sgn;
2835 n++;
2836 }
2837 *v = tmp;
2838 return n;
2839 }
2840
2841 /* After reload, split the load of an immediate constant. OPERANDS are the
2842 operands of the movsi_insn pattern which we are splitting. We return
2843 nonzero if we emitted a sequence to load the constant, zero if we emitted
2844 nothing because we want to use the splitter's default sequence. */
2845
2846 int
2847 split_load_immediate (rtx operands[])
2848 {
2849 HOST_WIDE_INT val = INTVAL (operands[1]);
2850 HOST_WIDE_INT tmp;
2851 HOST_WIDE_INT shifted = val;
2852 HOST_WIDE_INT shifted_compl = ~val;
2853 int num_zero = shiftr_zero (&shifted);
2854 int num_compl_zero = shiftr_zero (&shifted_compl);
2855 unsigned int regno = REGNO (operands[0]);
2856
2857 /* This case takes care of single-bit set/clear constants, which we could
2858 also implement with BITSET/BITCLR. */
2859 if (num_zero
2860 && shifted >= -32768 && shifted < 65536
2861 && (D_REGNO_P (regno)
2862 || (regno >= REG_P0 && regno <= REG_P7 && num_zero <= 2)))
2863 {
2864 emit_insn (gen_movsi (operands[0], GEN_INT (shifted)));
2865 emit_insn (gen_ashlsi3 (operands[0], operands[0], GEN_INT (num_zero)));
2866 return 1;
2867 }
2868
2869 tmp = val & 0xFFFF;
2870 tmp |= -(tmp & 0x8000);
2871
2872 /* If high word has one bit set or clear, try to use a bit operation. */
2873 if (D_REGNO_P (regno))
2874 {
2875 if (log2constp (val & 0xFFFF0000))
2876 {
2877 emit_insn (gen_movsi (operands[0], GEN_INT (val & 0xFFFF)));
2878 emit_insn (gen_iorsi3 (operands[0], operands[0], GEN_INT (val & 0xFFFF0000)));
2879 return 1;
2880 }
2881 else if (log2constp (val | 0xFFFF) && (val & 0x8000) != 0)
2882 {
2883 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2884 emit_insn (gen_andsi3 (operands[0], operands[0], GEN_INT (val | 0xFFFF)));
2885 }
2886 }
2887
2888 if (D_REGNO_P (regno))
2889 {
2890 if (tmp >= -64 && tmp <= 63)
2891 {
2892 emit_insn (gen_movsi (operands[0], GEN_INT (tmp)));
2893 emit_insn (gen_movstricthi_high (operands[0], GEN_INT (val & -65536)));
2894 return 1;
2895 }
2896
2897 if ((val & 0xFFFF0000) == 0)
2898 {
2899 emit_insn (gen_movsi (operands[0], const0_rtx));
2900 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2901 return 1;
2902 }
2903
2904 if ((val & 0xFFFF0000) == 0xFFFF0000)
2905 {
2906 emit_insn (gen_movsi (operands[0], constm1_rtx));
2907 emit_insn (gen_movsi_low (operands[0], operands[0], operands[1]));
2908 return 1;
2909 }
2910 }
2911
2912 /* Need DREGs for the remaining case. */
2913 if (regno > REG_R7)
2914 return 0;
2915
2916 if (optimize_size
2917 && num_compl_zero && shifted_compl >= -64 && shifted_compl <= 63)
2918 {
2919 /* If optimizing for size, generate a sequence that has more instructions
2920 but is shorter. */
2921 emit_insn (gen_movsi (operands[0], GEN_INT (shifted_compl)));
2922 emit_insn (gen_ashlsi3 (operands[0], operands[0],
2923 GEN_INT (num_compl_zero)));
2924 emit_insn (gen_one_cmplsi2 (operands[0], operands[0]));
2925 return 1;
2926 }
2927 return 0;
2928 }
2929 \f
2930 /* Return true if the legitimate memory address for a memory operand of mode
2931 MODE. Return false if not. */
2932
2933 static bool
2934 bfin_valid_add (enum machine_mode mode, HOST_WIDE_INT value)
2935 {
2936 unsigned HOST_WIDE_INT v = value > 0 ? value : -value;
2937 int sz = GET_MODE_SIZE (mode);
2938 int shift = sz == 1 ? 0 : sz == 2 ? 1 : 2;
2939 /* The usual offsettable_memref machinery doesn't work so well for this
2940 port, so we deal with the problem here. */
2941 if (value > 0 && sz == 8)
2942 v += 4;
2943 return (v & ~(0x7fff << shift)) == 0;
2944 }
2945
2946 static bool
2947 bfin_valid_reg_p (unsigned int regno, int strict, enum machine_mode mode,
2948 enum rtx_code outer_code)
2949 {
2950 if (strict)
2951 return REGNO_OK_FOR_BASE_STRICT_P (regno, mode, outer_code, SCRATCH);
2952 else
2953 return REGNO_OK_FOR_BASE_NONSTRICT_P (regno, mode, outer_code, SCRATCH);
2954 }
2955
2956 /* Recognize an RTL expression that is a valid memory address for an
2957 instruction. The MODE argument is the machine mode for the MEM expression
2958 that wants to use this address.
2959
2960 Blackfin addressing modes are as follows:
2961
2962 [preg]
2963 [preg + imm16]
2964
2965 B [ Preg + uimm15 ]
2966 W [ Preg + uimm16m2 ]
2967 [ Preg + uimm17m4 ]
2968
2969 [preg++]
2970 [preg--]
2971 [--sp]
2972 */
2973
2974 static bool
2975 bfin_legitimate_address_p (enum machine_mode mode, rtx x, bool strict)
2976 {
2977 switch (GET_CODE (x)) {
2978 case REG:
2979 if (bfin_valid_reg_p (REGNO (x), strict, mode, MEM))
2980 return true;
2981 break;
2982 case PLUS:
2983 if (REG_P (XEXP (x, 0))
2984 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PLUS)
2985 && ((GET_CODE (XEXP (x, 1)) == UNSPEC && mode == SImode)
2986 || (GET_CODE (XEXP (x, 1)) == CONST_INT
2987 && bfin_valid_add (mode, INTVAL (XEXP (x, 1))))))
2988 return true;
2989 break;
2990 case POST_INC:
2991 case POST_DEC:
2992 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2993 && REG_P (XEXP (x, 0))
2994 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, POST_INC))
2995 return true;
2996 case PRE_DEC:
2997 if (LEGITIMATE_MODE_FOR_AUTOINC_P (mode)
2998 && XEXP (x, 0) == stack_pointer_rtx
2999 && REG_P (XEXP (x, 0))
3000 && bfin_valid_reg_p (REGNO (XEXP (x, 0)), strict, mode, PRE_DEC))
3001 return true;
3002 break;
3003 default:
3004 break;
3005 }
3006 return false;
3007 }
3008
3009 /* Decide whether we can force certain constants to memory. If we
3010 decide we can't, the caller should be able to cope with it in
3011 another way. */
3012
3013 static bool
3014 bfin_cannot_force_const_mem (rtx x ATTRIBUTE_UNUSED)
3015 {
3016 /* We have only one class of non-legitimate constants, and our movsi
3017 expander knows how to handle them. Dropping these constants into the
3018 data section would only shift the problem - we'd still get relocs
3019 outside the object, in the data section rather than the text section. */
3020 return true;
3021 }
3022
3023 /* Ensure that for any constant of the form symbol + offset, the offset
3024 remains within the object. Any other constants are ok.
3025 This ensures that flat binaries never have to deal with relocations
3026 crossing section boundaries. */
3027
3028 bool
3029 bfin_legitimate_constant_p (rtx x)
3030 {
3031 rtx sym;
3032 HOST_WIDE_INT offset;
3033
3034 if (GET_CODE (x) != CONST)
3035 return true;
3036
3037 x = XEXP (x, 0);
3038 gcc_assert (GET_CODE (x) == PLUS);
3039
3040 sym = XEXP (x, 0);
3041 x = XEXP (x, 1);
3042 if (GET_CODE (sym) != SYMBOL_REF
3043 || GET_CODE (x) != CONST_INT)
3044 return true;
3045 offset = INTVAL (x);
3046
3047 if (SYMBOL_REF_DECL (sym) == 0)
3048 return true;
3049 if (offset < 0
3050 || offset >= int_size_in_bytes (TREE_TYPE (SYMBOL_REF_DECL (sym))))
3051 return false;
3052
3053 return true;
3054 }
3055
3056 static bool
3057 bfin_rtx_costs (rtx x, int code, int outer_code, int *total, bool speed)
3058 {
3059 int cost2 = COSTS_N_INSNS (1);
3060 rtx op0, op1;
3061
3062 switch (code)
3063 {
3064 case CONST_INT:
3065 if (outer_code == SET || outer_code == PLUS)
3066 *total = satisfies_constraint_Ks7 (x) ? 0 : cost2;
3067 else if (outer_code == AND)
3068 *total = log2constp (~INTVAL (x)) ? 0 : cost2;
3069 else if (outer_code == LE || outer_code == LT || outer_code == EQ)
3070 *total = (INTVAL (x) >= -4 && INTVAL (x) <= 3) ? 0 : cost2;
3071 else if (outer_code == LEU || outer_code == LTU)
3072 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 7) ? 0 : cost2;
3073 else if (outer_code == MULT)
3074 *total = (INTVAL (x) == 2 || INTVAL (x) == 4) ? 0 : cost2;
3075 else if (outer_code == ASHIFT && (INTVAL (x) == 1 || INTVAL (x) == 2))
3076 *total = 0;
3077 else if (outer_code == ASHIFT || outer_code == ASHIFTRT
3078 || outer_code == LSHIFTRT)
3079 *total = (INTVAL (x) >= 0 && INTVAL (x) <= 31) ? 0 : cost2;
3080 else if (outer_code == IOR || outer_code == XOR)
3081 *total = (INTVAL (x) & (INTVAL (x) - 1)) == 0 ? 0 : cost2;
3082 else
3083 *total = cost2;
3084 return true;
3085
3086 case CONST:
3087 case LABEL_REF:
3088 case SYMBOL_REF:
3089 case CONST_DOUBLE:
3090 *total = COSTS_N_INSNS (2);
3091 return true;
3092
3093 case PLUS:
3094 op0 = XEXP (x, 0);
3095 op1 = XEXP (x, 1);
3096 if (GET_MODE (x) == SImode)
3097 {
3098 if (GET_CODE (op0) == MULT
3099 && GET_CODE (XEXP (op0, 1)) == CONST_INT)
3100 {
3101 HOST_WIDE_INT val = INTVAL (XEXP (op0, 1));
3102 if (val == 2 || val == 4)
3103 {
3104 *total = cost2;
3105 *total += rtx_cost (XEXP (op0, 0), outer_code, speed);
3106 *total += rtx_cost (op1, outer_code, speed);
3107 return true;
3108 }
3109 }
3110 *total = cost2;
3111 if (GET_CODE (op0) != REG
3112 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3113 *total += rtx_cost (op0, SET, speed);
3114 #if 0 /* We'd like to do this for accuracy, but it biases the loop optimizer
3115 towards creating too many induction variables. */
3116 if (!reg_or_7bit_operand (op1, SImode))
3117 *total += rtx_cost (op1, SET, speed);
3118 #endif
3119 }
3120 else if (GET_MODE (x) == DImode)
3121 {
3122 *total = 6 * cost2;
3123 if (GET_CODE (op1) != CONST_INT
3124 || !satisfies_constraint_Ks7 (op1))
3125 *total += rtx_cost (op1, PLUS, speed);
3126 if (GET_CODE (op0) != REG
3127 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3128 *total += rtx_cost (op0, PLUS, speed);
3129 }
3130 return true;
3131
3132 case MINUS:
3133 if (GET_MODE (x) == DImode)
3134 *total = 6 * cost2;
3135 else
3136 *total = cost2;
3137 return true;
3138
3139 case ASHIFT:
3140 case ASHIFTRT:
3141 case LSHIFTRT:
3142 if (GET_MODE (x) == DImode)
3143 *total = 6 * cost2;
3144 else
3145 *total = cost2;
3146
3147 op0 = XEXP (x, 0);
3148 op1 = XEXP (x, 1);
3149 if (GET_CODE (op0) != REG
3150 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3151 *total += rtx_cost (op0, code, speed);
3152
3153 return true;
3154
3155 case IOR:
3156 case AND:
3157 case XOR:
3158 op0 = XEXP (x, 0);
3159 op1 = XEXP (x, 1);
3160
3161 /* Handle special cases of IOR: rotates, ALIGN insns, movstricthi_high. */
3162 if (code == IOR)
3163 {
3164 if ((GET_CODE (op0) == LSHIFTRT && GET_CODE (op1) == ASHIFT)
3165 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == ZERO_EXTEND)
3166 || (GET_CODE (op0) == ASHIFT && GET_CODE (op1) == LSHIFTRT)
3167 || (GET_CODE (op0) == AND && GET_CODE (op1) == CONST_INT))
3168 {
3169 *total = cost2;
3170 return true;
3171 }
3172 }
3173
3174 if (GET_CODE (op0) != REG
3175 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3176 *total += rtx_cost (op0, code, speed);
3177
3178 if (GET_MODE (x) == DImode)
3179 {
3180 *total = 2 * cost2;
3181 return true;
3182 }
3183 *total = cost2;
3184 if (GET_MODE (x) != SImode)
3185 return true;
3186
3187 if (code == AND)
3188 {
3189 if (! rhs_andsi3_operand (XEXP (x, 1), SImode))
3190 *total += rtx_cost (XEXP (x, 1), code, speed);
3191 }
3192 else
3193 {
3194 if (! regorlog2_operand (XEXP (x, 1), SImode))
3195 *total += rtx_cost (XEXP (x, 1), code, speed);
3196 }
3197
3198 return true;
3199
3200 case ZERO_EXTRACT:
3201 case SIGN_EXTRACT:
3202 if (outer_code == SET
3203 && XEXP (x, 1) == const1_rtx
3204 && GET_CODE (XEXP (x, 2)) == CONST_INT)
3205 {
3206 *total = 2 * cost2;
3207 return true;
3208 }
3209 /* fall through */
3210
3211 case SIGN_EXTEND:
3212 case ZERO_EXTEND:
3213 *total = cost2;
3214 return true;
3215
3216 case MULT:
3217 {
3218 op0 = XEXP (x, 0);
3219 op1 = XEXP (x, 1);
3220 if (GET_CODE (op0) == GET_CODE (op1)
3221 && (GET_CODE (op0) == ZERO_EXTEND
3222 || GET_CODE (op0) == SIGN_EXTEND))
3223 {
3224 *total = COSTS_N_INSNS (1);
3225 op0 = XEXP (op0, 0);
3226 op1 = XEXP (op1, 0);
3227 }
3228 else if (!speed)
3229 *total = COSTS_N_INSNS (1);
3230 else
3231 *total = COSTS_N_INSNS (3);
3232
3233 if (GET_CODE (op0) != REG
3234 && (GET_CODE (op0) != SUBREG || GET_CODE (SUBREG_REG (op0)) != REG))
3235 *total += rtx_cost (op0, MULT, speed);
3236 if (GET_CODE (op1) != REG
3237 && (GET_CODE (op1) != SUBREG || GET_CODE (SUBREG_REG (op1)) != REG))
3238 *total += rtx_cost (op1, MULT, speed);
3239 }
3240 return true;
3241
3242 case UDIV:
3243 case UMOD:
3244 *total = COSTS_N_INSNS (32);
3245 return true;
3246
3247 case VEC_CONCAT:
3248 case VEC_SELECT:
3249 if (outer_code == SET)
3250 *total = cost2;
3251 return true;
3252
3253 default:
3254 return false;
3255 }
3256 }
3257 \f
3258 /* Used for communication between {push,pop}_multiple_operation (which
3259 we use not only as a predicate) and the corresponding output functions. */
3260 static int first_preg_to_save, first_dreg_to_save;
3261 static int n_regs_to_save;
3262
3263 int
3264 push_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3265 {
3266 int lastdreg = 8, lastpreg = 6;
3267 int i, group;
3268
3269 first_preg_to_save = lastpreg;
3270 first_dreg_to_save = lastdreg;
3271 for (i = 1, group = 0; i < XVECLEN (op, 0) - 1; i++)
3272 {
3273 rtx t = XVECEXP (op, 0, i);
3274 rtx src, dest;
3275 int regno;
3276
3277 if (GET_CODE (t) != SET)
3278 return 0;
3279
3280 src = SET_SRC (t);
3281 dest = SET_DEST (t);
3282 if (GET_CODE (dest) != MEM || ! REG_P (src))
3283 return 0;
3284 dest = XEXP (dest, 0);
3285 if (GET_CODE (dest) != PLUS
3286 || ! REG_P (XEXP (dest, 0))
3287 || REGNO (XEXP (dest, 0)) != REG_SP
3288 || GET_CODE (XEXP (dest, 1)) != CONST_INT
3289 || INTVAL (XEXP (dest, 1)) != -i * 4)
3290 return 0;
3291
3292 regno = REGNO (src);
3293 if (group == 0)
3294 {
3295 if (D_REGNO_P (regno))
3296 {
3297 group = 1;
3298 first_dreg_to_save = lastdreg = regno - REG_R0;
3299 }
3300 else if (regno >= REG_P0 && regno <= REG_P7)
3301 {
3302 group = 2;
3303 first_preg_to_save = lastpreg = regno - REG_P0;
3304 }
3305 else
3306 return 0;
3307
3308 continue;
3309 }
3310
3311 if (group == 1)
3312 {
3313 if (regno >= REG_P0 && regno <= REG_P7)
3314 {
3315 group = 2;
3316 first_preg_to_save = lastpreg = regno - REG_P0;
3317 }
3318 else if (regno != REG_R0 + lastdreg + 1)
3319 return 0;
3320 else
3321 lastdreg++;
3322 }
3323 else if (group == 2)
3324 {
3325 if (regno != REG_P0 + lastpreg + 1)
3326 return 0;
3327 lastpreg++;
3328 }
3329 }
3330 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3331 return 1;
3332 }
3333
3334 int
3335 pop_multiple_operation (rtx op, enum machine_mode mode ATTRIBUTE_UNUSED)
3336 {
3337 int lastdreg = 8, lastpreg = 6;
3338 int i, group;
3339
3340 for (i = 1, group = 0; i < XVECLEN (op, 0); i++)
3341 {
3342 rtx t = XVECEXP (op, 0, i);
3343 rtx src, dest;
3344 int regno;
3345
3346 if (GET_CODE (t) != SET)
3347 return 0;
3348
3349 src = SET_SRC (t);
3350 dest = SET_DEST (t);
3351 if (GET_CODE (src) != MEM || ! REG_P (dest))
3352 return 0;
3353 src = XEXP (src, 0);
3354
3355 if (i == 1)
3356 {
3357 if (! REG_P (src) || REGNO (src) != REG_SP)
3358 return 0;
3359 }
3360 else if (GET_CODE (src) != PLUS
3361 || ! REG_P (XEXP (src, 0))
3362 || REGNO (XEXP (src, 0)) != REG_SP
3363 || GET_CODE (XEXP (src, 1)) != CONST_INT
3364 || INTVAL (XEXP (src, 1)) != (i - 1) * 4)
3365 return 0;
3366
3367 regno = REGNO (dest);
3368 if (group == 0)
3369 {
3370 if (regno == REG_R7)
3371 {
3372 group = 1;
3373 lastdreg = 7;
3374 }
3375 else if (regno != REG_P0 + lastpreg - 1)
3376 return 0;
3377 else
3378 lastpreg--;
3379 }
3380 else if (group == 1)
3381 {
3382 if (regno != REG_R0 + lastdreg - 1)
3383 return 0;
3384 else
3385 lastdreg--;
3386 }
3387 }
3388 first_dreg_to_save = lastdreg;
3389 first_preg_to_save = lastpreg;
3390 n_regs_to_save = 8 - first_dreg_to_save + 6 - first_preg_to_save;
3391 return 1;
3392 }
3393
3394 /* Emit assembly code for one multi-register push described by INSN, with
3395 operands in OPERANDS. */
3396
3397 void
3398 output_push_multiple (rtx insn, rtx *operands)
3399 {
3400 char buf[80];
3401 int ok;
3402
3403 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3404 ok = push_multiple_operation (PATTERN (insn), VOIDmode);
3405 gcc_assert (ok);
3406
3407 if (first_dreg_to_save == 8)
3408 sprintf (buf, "[--sp] = ( p5:%d );\n", first_preg_to_save);
3409 else if (first_preg_to_save == 6)
3410 sprintf (buf, "[--sp] = ( r7:%d );\n", first_dreg_to_save);
3411 else
3412 sprintf (buf, "[--sp] = ( r7:%d, p5:%d );\n",
3413 first_dreg_to_save, first_preg_to_save);
3414
3415 output_asm_insn (buf, operands);
3416 }
3417
3418 /* Emit assembly code for one multi-register pop described by INSN, with
3419 operands in OPERANDS. */
3420
3421 void
3422 output_pop_multiple (rtx insn, rtx *operands)
3423 {
3424 char buf[80];
3425 int ok;
3426
3427 /* Validate the insn again, and compute first_[dp]reg_to_save. */
3428 ok = pop_multiple_operation (PATTERN (insn), VOIDmode);
3429 gcc_assert (ok);
3430
3431 if (first_dreg_to_save == 8)
3432 sprintf (buf, "( p5:%d ) = [sp++];\n", first_preg_to_save);
3433 else if (first_preg_to_save == 6)
3434 sprintf (buf, "( r7:%d ) = [sp++];\n", first_dreg_to_save);
3435 else
3436 sprintf (buf, "( r7:%d, p5:%d ) = [sp++];\n",
3437 first_dreg_to_save, first_preg_to_save);
3438
3439 output_asm_insn (buf, operands);
3440 }
3441
3442 /* Adjust DST and SRC by OFFSET bytes, and generate one move in mode MODE. */
3443
3444 static void
3445 single_move_for_movmem (rtx dst, rtx src, enum machine_mode mode, HOST_WIDE_INT offset)
3446 {
3447 rtx scratch = gen_reg_rtx (mode);
3448 rtx srcmem, dstmem;
3449
3450 srcmem = adjust_address_nv (src, mode, offset);
3451 dstmem = adjust_address_nv (dst, mode, offset);
3452 emit_move_insn (scratch, srcmem);
3453 emit_move_insn (dstmem, scratch);
3454 }
3455
3456 /* Expand a string move operation of COUNT_EXP bytes from SRC to DST, with
3457 alignment ALIGN_EXP. Return true if successful, false if we should fall
3458 back on a different method. */
3459
3460 bool
3461 bfin_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp)
3462 {
3463 rtx srcreg, destreg, countreg;
3464 HOST_WIDE_INT align = 0;
3465 unsigned HOST_WIDE_INT count = 0;
3466
3467 if (GET_CODE (align_exp) == CONST_INT)
3468 align = INTVAL (align_exp);
3469 if (GET_CODE (count_exp) == CONST_INT)
3470 {
3471 count = INTVAL (count_exp);
3472 #if 0
3473 if (!TARGET_INLINE_ALL_STRINGOPS && count > 64)
3474 return false;
3475 #endif
3476 }
3477
3478 /* If optimizing for size, only do single copies inline. */
3479 if (optimize_size)
3480 {
3481 if (count == 2 && align < 2)
3482 return false;
3483 if (count == 4 && align < 4)
3484 return false;
3485 if (count != 1 && count != 2 && count != 4)
3486 return false;
3487 }
3488 if (align < 2 && count != 1)
3489 return false;
3490
3491 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
3492 if (destreg != XEXP (dst, 0))
3493 dst = replace_equiv_address_nv (dst, destreg);
3494 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
3495 if (srcreg != XEXP (src, 0))
3496 src = replace_equiv_address_nv (src, srcreg);
3497
3498 if (count != 0 && align >= 2)
3499 {
3500 unsigned HOST_WIDE_INT offset = 0;
3501
3502 if (align >= 4)
3503 {
3504 if ((count & ~3) == 4)
3505 {
3506 single_move_for_movmem (dst, src, SImode, offset);
3507 offset = 4;
3508 }
3509 else if (count & ~3)
3510 {
3511 HOST_WIDE_INT new_count = ((count >> 2) & 0x3fffffff) - 1;
3512 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3513
3514 emit_insn (gen_rep_movsi (destreg, srcreg, countreg, destreg, srcreg));
3515 cfun->machine->has_loopreg_clobber = true;
3516 }
3517 if (count & 2)
3518 {
3519 single_move_for_movmem (dst, src, HImode, offset);
3520 offset += 2;
3521 }
3522 }
3523 else
3524 {
3525 if ((count & ~1) == 2)
3526 {
3527 single_move_for_movmem (dst, src, HImode, offset);
3528 offset = 2;
3529 }
3530 else if (count & ~1)
3531 {
3532 HOST_WIDE_INT new_count = ((count >> 1) & 0x7fffffff) - 1;
3533 countreg = copy_to_mode_reg (Pmode, GEN_INT (new_count));
3534
3535 emit_insn (gen_rep_movhi (destreg, srcreg, countreg, destreg, srcreg));
3536 cfun->machine->has_loopreg_clobber = true;
3537 }
3538 }
3539 if (count & 1)
3540 {
3541 single_move_for_movmem (dst, src, QImode, offset);
3542 }
3543 return true;
3544 }
3545 return false;
3546 }
3547 \f
3548 /* Compute the alignment for a local variable.
3549 TYPE is the data type, and ALIGN is the alignment that
3550 the object would ordinarily have. The value of this macro is used
3551 instead of that alignment to align the object. */
3552
3553 int
3554 bfin_local_alignment (tree type, int align)
3555 {
3556 /* Increasing alignment for (relatively) big types allows the builtin
3557 memcpy can use 32 bit loads/stores. */
3558 if (TYPE_SIZE (type)
3559 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
3560 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) > 8
3561 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 32)
3562 return 32;
3563 return align;
3564 }
3565 \f
3566 /* Implement TARGET_SCHED_ISSUE_RATE. */
3567
3568 static int
3569 bfin_issue_rate (void)
3570 {
3571 return 3;
3572 }
3573
3574 static int
3575 bfin_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3576 {
3577 enum attr_type insn_type, dep_insn_type;
3578 int dep_insn_code_number;
3579
3580 /* Anti and output dependencies have zero cost. */
3581 if (REG_NOTE_KIND (link) != 0)
3582 return 0;
3583
3584 dep_insn_code_number = recog_memoized (dep_insn);
3585
3586 /* If we can't recognize the insns, we can't really do anything. */
3587 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
3588 return cost;
3589
3590 insn_type = get_attr_type (insn);
3591 dep_insn_type = get_attr_type (dep_insn);
3592
3593 if (dep_insn_type == TYPE_MOVE || dep_insn_type == TYPE_MCLD)
3594 {
3595 rtx pat = PATTERN (dep_insn);
3596 if (GET_CODE (pat) == PARALLEL)
3597 pat = XVECEXP (pat, 0, 0);
3598 rtx dest = SET_DEST (pat);
3599 rtx src = SET_SRC (pat);
3600 if (! ADDRESS_REGNO_P (REGNO (dest))
3601 || ! (MEM_P (src) || D_REGNO_P (REGNO (src))))
3602 return cost;
3603 return cost + (dep_insn_type == TYPE_MOVE ? 4 : 3);
3604 }
3605
3606 return cost;
3607 }
3608 \f
3609 /* This function acts like NEXT_INSN, but is aware of three-insn bundles and
3610 skips all subsequent parallel instructions if INSN is the start of such
3611 a group. */
3612 static rtx
3613 find_next_insn_start (rtx insn)
3614 {
3615 if (GET_MODE (insn) == SImode)
3616 {
3617 while (GET_MODE (insn) != QImode)
3618 insn = NEXT_INSN (insn);
3619 }
3620 return NEXT_INSN (insn);
3621 }
3622
3623 /* This function acts like PREV_INSN, but is aware of three-insn bundles and
3624 skips all subsequent parallel instructions if INSN is the start of such
3625 a group. */
3626 static rtx
3627 find_prev_insn_start (rtx insn)
3628 {
3629 insn = PREV_INSN (insn);
3630 gcc_assert (GET_MODE (insn) != SImode);
3631 if (GET_MODE (insn) == QImode)
3632 {
3633 while (GET_MODE (PREV_INSN (insn)) == SImode)
3634 insn = PREV_INSN (insn);
3635 }
3636 return insn;
3637 }
3638 \f
3639 /* Increment the counter for the number of loop instructions in the
3640 current function. */
3641
3642 void
3643 bfin_hardware_loop (void)
3644 {
3645 cfun->machine->has_hardware_loops++;
3646 }
3647
3648 /* Maximum loop nesting depth. */
3649 #define MAX_LOOP_DEPTH 2
3650
3651 /* Maximum size of a loop. */
3652 #define MAX_LOOP_LENGTH 2042
3653
3654 /* Maximum distance of the LSETUP instruction from the loop start. */
3655 #define MAX_LSETUP_DISTANCE 30
3656
3657 /* We need to keep a vector of loops */
3658 typedef struct loop_info *loop_info;
3659 DEF_VEC_P (loop_info);
3660 DEF_VEC_ALLOC_P (loop_info,heap);
3661
3662 /* Information about a loop we have found (or are in the process of
3663 finding). */
3664 struct GTY (()) loop_info
3665 {
3666 /* loop number, for dumps */
3667 int loop_no;
3668
3669 /* All edges that jump into and out of the loop. */
3670 VEC(edge,gc) *incoming;
3671
3672 /* We can handle two cases: all incoming edges have the same destination
3673 block, or all incoming edges have the same source block. These two
3674 members are set to the common source or destination we found, or NULL
3675 if different blocks were found. If both are NULL the loop can't be
3676 optimized. */
3677 basic_block incoming_src;
3678 basic_block incoming_dest;
3679
3680 /* First block in the loop. This is the one branched to by the loop_end
3681 insn. */
3682 basic_block head;
3683
3684 /* Last block in the loop (the one with the loop_end insn). */
3685 basic_block tail;
3686
3687 /* The successor block of the loop. This is the one the loop_end insn
3688 falls into. */
3689 basic_block successor;
3690
3691 /* The last instruction in the tail. */
3692 rtx last_insn;
3693
3694 /* The loop_end insn. */
3695 rtx loop_end;
3696
3697 /* The iteration register. */
3698 rtx iter_reg;
3699
3700 /* The new label placed at the beginning of the loop. */
3701 rtx start_label;
3702
3703 /* The new label placed at the end of the loop. */
3704 rtx end_label;
3705
3706 /* The length of the loop. */
3707 int length;
3708
3709 /* The nesting depth of the loop. */
3710 int depth;
3711
3712 /* Nonzero if we can't optimize this loop. */
3713 int bad;
3714
3715 /* True if we have visited this loop. */
3716 int visited;
3717
3718 /* True if this loop body clobbers any of LC0, LT0, or LB0. */
3719 int clobber_loop0;
3720
3721 /* True if this loop body clobbers any of LC1, LT1, or LB1. */
3722 int clobber_loop1;
3723
3724 /* Next loop in the graph. */
3725 struct loop_info *next;
3726
3727 /* Immediate outer loop of this loop. */
3728 struct loop_info *outer;
3729
3730 /* Vector of blocks only within the loop, including those within
3731 inner loops. */
3732 VEC (basic_block,heap) *blocks;
3733
3734 /* Same information in a bitmap. */
3735 bitmap block_bitmap;
3736
3737 /* Vector of inner loops within this loop */
3738 VEC (loop_info,heap) *loops;
3739 };
3740
3741 static void
3742 bfin_dump_loops (loop_info loops)
3743 {
3744 loop_info loop;
3745
3746 for (loop = loops; loop; loop = loop->next)
3747 {
3748 loop_info i;
3749 basic_block b;
3750 unsigned ix;
3751
3752 fprintf (dump_file, ";; loop %d: ", loop->loop_no);
3753 if (loop->bad)
3754 fprintf (dump_file, "(bad) ");
3755 fprintf (dump_file, "{head:%d, depth:%d}", loop->head->index, loop->depth);
3756
3757 fprintf (dump_file, " blocks: [ ");
3758 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, b); ix++)
3759 fprintf (dump_file, "%d ", b->index);
3760 fprintf (dump_file, "] ");
3761
3762 fprintf (dump_file, " inner loops: [ ");
3763 for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, i); ix++)
3764 fprintf (dump_file, "%d ", i->loop_no);
3765 fprintf (dump_file, "]\n");
3766 }
3767 fprintf (dump_file, "\n");
3768 }
3769
3770 /* Scan the blocks of LOOP (and its inferiors) looking for basic block
3771 BB. Return true, if we find it. */
3772
3773 static bool
3774 bfin_bb_in_loop (loop_info loop, basic_block bb)
3775 {
3776 return bitmap_bit_p (loop->block_bitmap, bb->index);
3777 }
3778
3779 /* Scan the blocks of LOOP (and its inferiors) looking for uses of
3780 REG. Return true, if we find any. Don't count the loop's loop_end
3781 insn if it matches LOOP_END. */
3782
3783 static bool
3784 bfin_scan_loop (loop_info loop, rtx reg, rtx loop_end)
3785 {
3786 unsigned ix;
3787 basic_block bb;
3788
3789 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
3790 {
3791 rtx insn;
3792
3793 for (insn = BB_HEAD (bb);
3794 insn != NEXT_INSN (BB_END (bb));
3795 insn = NEXT_INSN (insn))
3796 {
3797 if (!INSN_P (insn))
3798 continue;
3799 if (insn == loop_end)
3800 continue;
3801 if (reg_mentioned_p (reg, PATTERN (insn)))
3802 return true;
3803 }
3804 }
3805 return false;
3806 }
3807
3808 /* Estimate the length of INSN conservatively. */
3809
3810 static int
3811 length_for_loop (rtx insn)
3812 {
3813 int length = 0;
3814 if (JUMP_P (insn) && any_condjump_p (insn) && !optimize_size)
3815 {
3816 if (ENABLE_WA_SPECULATIVE_SYNCS)
3817 length = 8;
3818 else if (ENABLE_WA_SPECULATIVE_LOADS)
3819 length = 6;
3820 }
3821 else if (LABEL_P (insn))
3822 {
3823 if (ENABLE_WA_SPECULATIVE_SYNCS)
3824 length = 4;
3825 }
3826
3827 if (INSN_P (insn))
3828 length += get_attr_length (insn);
3829
3830 return length;
3831 }
3832
3833 /* Optimize LOOP. */
3834
3835 static void
3836 bfin_optimize_loop (loop_info loop)
3837 {
3838 basic_block bb;
3839 loop_info inner;
3840 rtx insn, last_insn;
3841 rtx loop_init, start_label, end_label;
3842 rtx reg_lc0, reg_lc1, reg_lt0, reg_lt1, reg_lb0, reg_lb1;
3843 rtx iter_reg, scratchreg, scratch_init, scratch_init_insn;
3844 rtx lc_reg, lt_reg, lb_reg;
3845 rtx seq, seq_end;
3846 int length;
3847 unsigned ix;
3848 int inner_depth = 0;
3849
3850 if (loop->visited)
3851 return;
3852
3853 loop->visited = 1;
3854
3855 if (loop->bad)
3856 {
3857 if (dump_file)
3858 fprintf (dump_file, ";; loop %d bad when found\n", loop->loop_no);
3859 goto bad_loop;
3860 }
3861
3862 /* Every loop contains in its list of inner loops every loop nested inside
3863 it, even if there are intermediate loops. This works because we're doing
3864 a depth-first search here and never visit a loop more than once. */
3865 for (ix = 0; VEC_iterate (loop_info, loop->loops, ix, inner); ix++)
3866 {
3867 bfin_optimize_loop (inner);
3868
3869 if (!inner->bad && inner_depth < inner->depth)
3870 {
3871 inner_depth = inner->depth;
3872
3873 loop->clobber_loop0 |= inner->clobber_loop0;
3874 loop->clobber_loop1 |= inner->clobber_loop1;
3875 }
3876 }
3877
3878 loop->depth = inner_depth + 1;
3879 if (loop->depth > MAX_LOOP_DEPTH)
3880 {
3881 if (dump_file)
3882 fprintf (dump_file, ";; loop %d too deep\n", loop->loop_no);
3883 goto bad_loop;
3884 }
3885
3886 /* Get the loop iteration register. */
3887 iter_reg = loop->iter_reg;
3888
3889 if (!REG_P (iter_reg))
3890 {
3891 if (dump_file)
3892 fprintf (dump_file, ";; loop %d iteration count not in a register\n",
3893 loop->loop_no);
3894 goto bad_loop;
3895 }
3896 scratchreg = NULL_RTX;
3897 scratch_init = iter_reg;
3898 scratch_init_insn = NULL_RTX;
3899 if (!PREG_P (iter_reg) && loop->incoming_src)
3900 {
3901 basic_block bb_in = loop->incoming_src;
3902 int i;
3903 for (i = REG_P0; i <= REG_P5; i++)
3904 if ((df_regs_ever_live_p (i)
3905 || (funkind (TREE_TYPE (current_function_decl)) == SUBROUTINE
3906 && call_used_regs[i]))
3907 && !REGNO_REG_SET_P (df_get_live_out (bb_in), i))
3908 {
3909 scratchreg = gen_rtx_REG (SImode, i);
3910 break;
3911 }
3912 for (insn = BB_END (bb_in); insn != BB_HEAD (bb_in);
3913 insn = PREV_INSN (insn))
3914 {
3915 rtx set;
3916 if (NOTE_P (insn) || BARRIER_P (insn))
3917 continue;
3918 set = single_set (insn);
3919 if (set && rtx_equal_p (SET_DEST (set), iter_reg))
3920 {
3921 if (CONSTANT_P (SET_SRC (set)))
3922 {
3923 scratch_init = SET_SRC (set);
3924 scratch_init_insn = insn;
3925 }
3926 break;
3927 }
3928 else if (reg_mentioned_p (iter_reg, PATTERN (insn)))
3929 break;
3930 }
3931 }
3932
3933 if (loop->incoming_src)
3934 {
3935 /* Make sure the predecessor is before the loop start label, as required by
3936 the LSETUP instruction. */
3937 length = 0;
3938 insn = BB_END (loop->incoming_src);
3939 /* If we have to insert the LSETUP before a jump, count that jump in the
3940 length. */
3941 if (VEC_length (edge, loop->incoming) > 1
3942 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
3943 {
3944 gcc_assert (JUMP_P (insn));
3945 insn = PREV_INSN (insn);
3946 }
3947
3948 for (; insn && insn != loop->start_label; insn = NEXT_INSN (insn))
3949 length += length_for_loop (insn);
3950
3951 if (!insn)
3952 {
3953 if (dump_file)
3954 fprintf (dump_file, ";; loop %d lsetup not before loop_start\n",
3955 loop->loop_no);
3956 goto bad_loop;
3957 }
3958
3959 /* Account for the pop of a scratch register where necessary. */
3960 if (!PREG_P (iter_reg) && scratchreg == NULL_RTX
3961 && ENABLE_WA_LOAD_LCREGS)
3962 length += 2;
3963
3964 if (length > MAX_LSETUP_DISTANCE)
3965 {
3966 if (dump_file)
3967 fprintf (dump_file, ";; loop %d lsetup too far away\n", loop->loop_no);
3968 goto bad_loop;
3969 }
3970 }
3971
3972 /* Check if start_label appears before loop_end and calculate the
3973 offset between them. We calculate the length of instructions
3974 conservatively. */
3975 length = 0;
3976 for (insn = loop->start_label;
3977 insn && insn != loop->loop_end;
3978 insn = NEXT_INSN (insn))
3979 length += length_for_loop (insn);
3980
3981 if (!insn)
3982 {
3983 if (dump_file)
3984 fprintf (dump_file, ";; loop %d start_label not before loop_end\n",
3985 loop->loop_no);
3986 goto bad_loop;
3987 }
3988
3989 loop->length = length;
3990 if (loop->length > MAX_LOOP_LENGTH)
3991 {
3992 if (dump_file)
3993 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
3994 goto bad_loop;
3995 }
3996
3997 /* Scan all the blocks to make sure they don't use iter_reg. */
3998 if (bfin_scan_loop (loop, iter_reg, loop->loop_end))
3999 {
4000 if (dump_file)
4001 fprintf (dump_file, ";; loop %d uses iterator\n", loop->loop_no);
4002 goto bad_loop;
4003 }
4004
4005 /* Scan all the insns to see if the loop body clobber
4006 any hardware loop registers. */
4007
4008 reg_lc0 = gen_rtx_REG (SImode, REG_LC0);
4009 reg_lc1 = gen_rtx_REG (SImode, REG_LC1);
4010 reg_lt0 = gen_rtx_REG (SImode, REG_LT0);
4011 reg_lt1 = gen_rtx_REG (SImode, REG_LT1);
4012 reg_lb0 = gen_rtx_REG (SImode, REG_LB0);
4013 reg_lb1 = gen_rtx_REG (SImode, REG_LB1);
4014
4015 for (ix = 0; VEC_iterate (basic_block, loop->blocks, ix, bb); ix++)
4016 {
4017 rtx insn;
4018
4019 for (insn = BB_HEAD (bb);
4020 insn != NEXT_INSN (BB_END (bb));
4021 insn = NEXT_INSN (insn))
4022 {
4023 if (!INSN_P (insn))
4024 continue;
4025
4026 if (reg_set_p (reg_lc0, insn)
4027 || reg_set_p (reg_lt0, insn)
4028 || reg_set_p (reg_lb0, insn))
4029 loop->clobber_loop0 = 1;
4030
4031 if (reg_set_p (reg_lc1, insn)
4032 || reg_set_p (reg_lt1, insn)
4033 || reg_set_p (reg_lb1, insn))
4034 loop->clobber_loop1 |= 1;
4035 }
4036 }
4037
4038 if ((loop->clobber_loop0 && loop->clobber_loop1)
4039 || (loop->depth == MAX_LOOP_DEPTH && loop->clobber_loop0))
4040 {
4041 loop->depth = MAX_LOOP_DEPTH + 1;
4042 if (dump_file)
4043 fprintf (dump_file, ";; loop %d no loop reg available\n",
4044 loop->loop_no);
4045 goto bad_loop;
4046 }
4047
4048 /* There should be an instruction before the loop_end instruction
4049 in the same basic block. And the instruction must not be
4050 - JUMP
4051 - CONDITIONAL BRANCH
4052 - CALL
4053 - CSYNC
4054 - SSYNC
4055 - Returns (RTS, RTN, etc.) */
4056
4057 bb = loop->tail;
4058 last_insn = find_prev_insn_start (loop->loop_end);
4059
4060 while (1)
4061 {
4062 for (; last_insn != BB_HEAD (bb);
4063 last_insn = find_prev_insn_start (last_insn))
4064 if (INSN_P (last_insn))
4065 break;
4066
4067 if (last_insn != BB_HEAD (bb))
4068 break;
4069
4070 if (single_pred_p (bb)
4071 && single_pred_edge (bb)->flags & EDGE_FALLTHRU
4072 && single_pred (bb) != ENTRY_BLOCK_PTR)
4073 {
4074 bb = single_pred (bb);
4075 last_insn = BB_END (bb);
4076 continue;
4077 }
4078 else
4079 {
4080 last_insn = NULL_RTX;
4081 break;
4082 }
4083 }
4084
4085 if (!last_insn)
4086 {
4087 if (dump_file)
4088 fprintf (dump_file, ";; loop %d has no last instruction\n",
4089 loop->loop_no);
4090 goto bad_loop;
4091 }
4092
4093 if (JUMP_P (last_insn) && !any_condjump_p (last_insn))
4094 {
4095 if (dump_file)
4096 fprintf (dump_file, ";; loop %d has bad last instruction\n",
4097 loop->loop_no);
4098 goto bad_loop;
4099 }
4100 /* In all other cases, try to replace a bad last insn with a nop. */
4101 else if (JUMP_P (last_insn)
4102 || CALL_P (last_insn)
4103 || get_attr_type (last_insn) == TYPE_SYNC
4104 || get_attr_type (last_insn) == TYPE_CALL
4105 || get_attr_seq_insns (last_insn) == SEQ_INSNS_MULTI
4106 || recog_memoized (last_insn) == CODE_FOR_return_internal
4107 || GET_CODE (PATTERN (last_insn)) == ASM_INPUT
4108 || asm_noperands (PATTERN (last_insn)) >= 0)
4109 {
4110 if (loop->length + 2 > MAX_LOOP_LENGTH)
4111 {
4112 if (dump_file)
4113 fprintf (dump_file, ";; loop %d too long\n", loop->loop_no);
4114 goto bad_loop;
4115 }
4116 if (dump_file)
4117 fprintf (dump_file, ";; loop %d has bad last insn; replace with nop\n",
4118 loop->loop_no);
4119
4120 last_insn = emit_insn_after (gen_forced_nop (), last_insn);
4121 }
4122
4123 loop->last_insn = last_insn;
4124
4125 /* The loop is good for replacement. */
4126 start_label = loop->start_label;
4127 end_label = gen_label_rtx ();
4128 iter_reg = loop->iter_reg;
4129
4130 if (loop->depth == 1 && !loop->clobber_loop1)
4131 {
4132 lc_reg = reg_lc1;
4133 lt_reg = reg_lt1;
4134 lb_reg = reg_lb1;
4135 loop->clobber_loop1 = 1;
4136 }
4137 else
4138 {
4139 lc_reg = reg_lc0;
4140 lt_reg = reg_lt0;
4141 lb_reg = reg_lb0;
4142 loop->clobber_loop0 = 1;
4143 }
4144
4145 loop->end_label = end_label;
4146
4147 /* Create a sequence containing the loop setup. */
4148 start_sequence ();
4149
4150 /* LSETUP only accepts P registers. If we have one, we can use it,
4151 otherwise there are several ways of working around the problem.
4152 If we're not affected by anomaly 312, we can load the LC register
4153 from any iteration register, and use LSETUP without initialization.
4154 If we've found a P scratch register that's not live here, we can
4155 instead copy the iter_reg into that and use an initializing LSETUP.
4156 If all else fails, push and pop P0 and use it as a scratch. */
4157 if (P_REGNO_P (REGNO (iter_reg)))
4158 {
4159 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4160 lb_reg, end_label,
4161 lc_reg, iter_reg);
4162 seq_end = emit_insn (loop_init);
4163 }
4164 else if (!ENABLE_WA_LOAD_LCREGS && DPREG_P (iter_reg))
4165 {
4166 emit_insn (gen_movsi (lc_reg, iter_reg));
4167 loop_init = gen_lsetup_without_autoinit (lt_reg, start_label,
4168 lb_reg, end_label,
4169 lc_reg);
4170 seq_end = emit_insn (loop_init);
4171 }
4172 else if (scratchreg != NULL_RTX)
4173 {
4174 emit_insn (gen_movsi (scratchreg, scratch_init));
4175 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4176 lb_reg, end_label,
4177 lc_reg, scratchreg);
4178 seq_end = emit_insn (loop_init);
4179 if (scratch_init_insn != NULL_RTX)
4180 delete_insn (scratch_init_insn);
4181 }
4182 else
4183 {
4184 rtx p0reg = gen_rtx_REG (SImode, REG_P0);
4185 rtx push = gen_frame_mem (SImode,
4186 gen_rtx_PRE_DEC (SImode, stack_pointer_rtx));
4187 rtx pop = gen_frame_mem (SImode,
4188 gen_rtx_POST_INC (SImode, stack_pointer_rtx));
4189 emit_insn (gen_movsi (push, p0reg));
4190 emit_insn (gen_movsi (p0reg, scratch_init));
4191 loop_init = gen_lsetup_with_autoinit (lt_reg, start_label,
4192 lb_reg, end_label,
4193 lc_reg, p0reg);
4194 emit_insn (loop_init);
4195 seq_end = emit_insn (gen_movsi (p0reg, pop));
4196 if (scratch_init_insn != NULL_RTX)
4197 delete_insn (scratch_init_insn);
4198 }
4199
4200 if (dump_file)
4201 {
4202 fprintf (dump_file, ";; replacing loop %d initializer with\n",
4203 loop->loop_no);
4204 print_rtl_single (dump_file, loop_init);
4205 fprintf (dump_file, ";; replacing loop %d terminator with\n",
4206 loop->loop_no);
4207 print_rtl_single (dump_file, loop->loop_end);
4208 }
4209
4210 /* If the loop isn't entered at the top, also create a jump to the entry
4211 point. */
4212 if (!loop->incoming_src && loop->head != loop->incoming_dest)
4213 {
4214 rtx label = BB_HEAD (loop->incoming_dest);
4215 /* If we're jumping to the final basic block in the loop, and there's
4216 only one cheap instruction before the end (typically an increment of
4217 an induction variable), we can just emit a copy here instead of a
4218 jump. */
4219 if (loop->incoming_dest == loop->tail
4220 && next_real_insn (label) == last_insn
4221 && asm_noperands (last_insn) < 0
4222 && GET_CODE (PATTERN (last_insn)) == SET)
4223 {
4224 seq_end = emit_insn (copy_rtx (PATTERN (last_insn)));
4225 }
4226 else
4227 {
4228 emit_jump_insn (gen_jump (label));
4229 seq_end = emit_barrier ();
4230 }
4231 }
4232
4233 seq = get_insns ();
4234 end_sequence ();
4235
4236 if (loop->incoming_src)
4237 {
4238 rtx prev = BB_END (loop->incoming_src);
4239 if (VEC_length (edge, loop->incoming) > 1
4240 || !(VEC_last (edge, loop->incoming)->flags & EDGE_FALLTHRU))
4241 {
4242 gcc_assert (JUMP_P (prev));
4243 prev = PREV_INSN (prev);
4244 }
4245 emit_insn_after (seq, prev);
4246 }
4247 else
4248 {
4249 basic_block new_bb;
4250 edge e;
4251 edge_iterator ei;
4252
4253 #ifdef ENABLE_CHECKING
4254 if (loop->head != loop->incoming_dest)
4255 {
4256 /* We aren't entering the loop at the top. Since we've established
4257 that the loop is entered only at one point, this means there
4258 can't be fallthru edges into the head. Any such fallthru edges
4259 would become invalid when we insert the new block, so verify
4260 that this does not in fact happen. */
4261 FOR_EACH_EDGE (e, ei, loop->head->preds)
4262 gcc_assert (!(e->flags & EDGE_FALLTHRU));
4263 }
4264 #endif
4265
4266 emit_insn_before (seq, BB_HEAD (loop->head));
4267 seq = emit_label_before (gen_label_rtx (), seq);
4268
4269 new_bb = create_basic_block (seq, seq_end, loop->head->prev_bb);
4270 FOR_EACH_EDGE (e, ei, loop->incoming)
4271 {
4272 if (!(e->flags & EDGE_FALLTHRU)
4273 || e->dest != loop->head)
4274 redirect_edge_and_branch_force (e, new_bb);
4275 else
4276 redirect_edge_succ (e, new_bb);
4277 }
4278 e = make_edge (new_bb, loop->head, 0);
4279 }
4280
4281 delete_insn (loop->loop_end);
4282 /* Insert the loop end label before the last instruction of the loop. */
4283 emit_label_before (loop->end_label, loop->last_insn);
4284
4285 return;
4286
4287 bad_loop:
4288
4289 if (dump_file)
4290 fprintf (dump_file, ";; loop %d is bad\n", loop->loop_no);
4291
4292 loop->bad = 1;
4293
4294 if (DPREG_P (loop->iter_reg))
4295 {
4296 /* If loop->iter_reg is a DREG or PREG, we can split it here
4297 without scratch register. */
4298 rtx insn, test;
4299
4300 emit_insn_before (gen_addsi3 (loop->iter_reg,
4301 loop->iter_reg,
4302 constm1_rtx),
4303 loop->loop_end);
4304
4305 test = gen_rtx_NE (VOIDmode, loop->iter_reg, const0_rtx);
4306 insn = emit_jump_insn_before (gen_cbranchsi4 (test,
4307 loop->iter_reg, const0_rtx,
4308 loop->start_label),
4309 loop->loop_end);
4310
4311 JUMP_LABEL (insn) = loop->start_label;
4312 LABEL_NUSES (loop->start_label)++;
4313 delete_insn (loop->loop_end);
4314 }
4315 }
4316
4317 /* Called from bfin_reorg_loops when a potential loop end is found. LOOP is
4318 a newly set up structure describing the loop, it is this function's
4319 responsibility to fill most of it. TAIL_BB and TAIL_INSN point to the
4320 loop_end insn and its enclosing basic block. */
4321
4322 static void
4323 bfin_discover_loop (loop_info loop, basic_block tail_bb, rtx tail_insn)
4324 {
4325 unsigned dwork = 0;
4326 basic_block bb;
4327 VEC (basic_block,heap) *works = VEC_alloc (basic_block,heap,20);
4328
4329 loop->tail = tail_bb;
4330 loop->head = BRANCH_EDGE (tail_bb)->dest;
4331 loop->successor = FALLTHRU_EDGE (tail_bb)->dest;
4332 loop->loop_end = tail_insn;
4333 loop->last_insn = NULL_RTX;
4334 loop->iter_reg = SET_DEST (XVECEXP (PATTERN (tail_insn), 0, 1));
4335 loop->depth = loop->length = 0;
4336 loop->visited = 0;
4337 loop->clobber_loop0 = loop->clobber_loop1 = 0;
4338 loop->outer = NULL;
4339 loop->loops = NULL;
4340 loop->incoming = VEC_alloc (edge, gc, 2);
4341 loop->start_label = XEXP (XEXP (SET_SRC (XVECEXP (PATTERN (tail_insn), 0, 0)), 1), 0);
4342 loop->end_label = NULL_RTX;
4343 loop->bad = 0;
4344
4345 VEC_safe_push (basic_block, heap, works, loop->head);
4346
4347 while (VEC_iterate (basic_block, works, dwork++, bb))
4348 {
4349 edge e;
4350 edge_iterator ei;
4351 if (bb == EXIT_BLOCK_PTR)
4352 {
4353 /* We've reached the exit block. The loop must be bad. */
4354 if (dump_file)
4355 fprintf (dump_file,
4356 ";; Loop is bad - reached exit block while scanning\n");
4357 loop->bad = 1;
4358 break;
4359 }
4360
4361 if (bitmap_bit_p (loop->block_bitmap, bb->index))
4362 continue;
4363
4364 /* We've not seen this block before. Add it to the loop's
4365 list and then add each successor to the work list. */
4366
4367 VEC_safe_push (basic_block, heap, loop->blocks, bb);
4368 bitmap_set_bit (loop->block_bitmap, bb->index);
4369
4370 if (bb != tail_bb)
4371 {
4372 FOR_EACH_EDGE (e, ei, bb->succs)
4373 {
4374 basic_block succ = EDGE_SUCC (bb, ei.index)->dest;
4375 if (!REGNO_REG_SET_P (df_get_live_in (succ),
4376 REGNO (loop->iter_reg)))
4377 continue;
4378 if (!VEC_space (basic_block, works, 1))
4379 {
4380 if (dwork)
4381 {
4382 VEC_block_remove (basic_block, works, 0, dwork);
4383 dwork = 0;
4384 }
4385 else
4386 VEC_reserve (basic_block, heap, works, 1);
4387 }
4388 VEC_quick_push (basic_block, works, succ);
4389 }
4390 }
4391 }
4392
4393 /* Find the predecessor, and make sure nothing else jumps into this loop. */
4394 if (!loop->bad)
4395 {
4396 int pass, retry;
4397 for (dwork = 0; VEC_iterate (basic_block, loop->blocks, dwork, bb); dwork++)
4398 {
4399 edge e;
4400 edge_iterator ei;
4401 FOR_EACH_EDGE (e, ei, bb->preds)
4402 {
4403 basic_block pred = e->src;
4404
4405 if (!bfin_bb_in_loop (loop, pred))
4406 {
4407 if (dump_file)
4408 fprintf (dump_file, ";; Loop %d: incoming edge %d -> %d\n",
4409 loop->loop_no, pred->index,
4410 e->dest->index);
4411 VEC_safe_push (edge, gc, loop->incoming, e);
4412 }
4413 }
4414 }
4415
4416 for (pass = 0, retry = 1; retry && pass < 2; pass++)
4417 {
4418 edge e;
4419 edge_iterator ei;
4420 bool first = true;
4421 retry = 0;
4422
4423 FOR_EACH_EDGE (e, ei, loop->incoming)
4424 {
4425 if (first)
4426 {
4427 loop->incoming_src = e->src;
4428 loop->incoming_dest = e->dest;
4429 first = false;
4430 }
4431 else
4432 {
4433 if (e->dest != loop->incoming_dest)
4434 loop->incoming_dest = NULL;
4435 if (e->src != loop->incoming_src)
4436 loop->incoming_src = NULL;
4437 }
4438 if (loop->incoming_src == NULL && loop->incoming_dest == NULL)
4439 {
4440 if (pass == 0)
4441 {
4442 if (dump_file)
4443 fprintf (dump_file,
4444 ";; retrying loop %d with forwarder blocks\n",
4445 loop->loop_no);
4446 retry = 1;
4447 break;
4448 }
4449 loop->bad = 1;
4450 if (dump_file)
4451 fprintf (dump_file,
4452 ";; can't find suitable entry for loop %d\n",
4453 loop->loop_no);
4454 goto out;
4455 }
4456 }
4457 if (retry)
4458 {
4459 retry = 0;
4460 FOR_EACH_EDGE (e, ei, loop->incoming)
4461 {
4462 if (forwarder_block_p (e->src))
4463 {
4464 edge e2;
4465 edge_iterator ei2;
4466
4467 if (dump_file)
4468 fprintf (dump_file,
4469 ";; Adding forwarder block %d to loop %d and retrying\n",
4470 e->src->index, loop->loop_no);
4471 VEC_safe_push (basic_block, heap, loop->blocks, e->src);
4472 bitmap_set_bit (loop->block_bitmap, e->src->index);
4473 FOR_EACH_EDGE (e2, ei2, e->src->preds)
4474 VEC_safe_push (edge, gc, loop->incoming, e2);
4475 VEC_unordered_remove (edge, loop->incoming, ei.index);
4476 retry = 1;
4477 break;
4478 }
4479 }
4480 if (!retry)
4481 {
4482 if (dump_file)
4483 fprintf (dump_file, ";; No forwarder blocks found\n");
4484 loop->bad = 1;
4485 }
4486 }
4487 }
4488 }
4489
4490 out:
4491 VEC_free (basic_block, heap, works);
4492 }
4493
4494 /* Analyze the structure of the loops in the current function. Use STACK
4495 for bitmap allocations. Returns all the valid candidates for hardware
4496 loops found in this function. */
4497 static loop_info
4498 bfin_discover_loops (bitmap_obstack *stack, FILE *dump_file)
4499 {
4500 loop_info loops = NULL;
4501 loop_info loop;
4502 basic_block bb;
4503 bitmap tmp_bitmap;
4504 int nloops = 0;
4505
4506 /* Find all the possible loop tails. This means searching for every
4507 loop_end instruction. For each one found, create a loop_info
4508 structure and add the head block to the work list. */
4509 FOR_EACH_BB (bb)
4510 {
4511 rtx tail = BB_END (bb);
4512
4513 while (GET_CODE (tail) == NOTE)
4514 tail = PREV_INSN (tail);
4515
4516 bb->aux = NULL;
4517
4518 if (INSN_P (tail) && recog_memoized (tail) == CODE_FOR_loop_end)
4519 {
4520 rtx insn;
4521 /* A possible loop end */
4522
4523 /* There's a degenerate case we can handle - an empty loop consisting
4524 of only a back branch. Handle that by deleting the branch. */
4525 insn = BB_HEAD (BRANCH_EDGE (bb)->dest);
4526 if (next_real_insn (insn) == tail)
4527 {
4528 if (dump_file)
4529 {
4530 fprintf (dump_file, ";; degenerate loop ending at\n");
4531 print_rtl_single (dump_file, tail);
4532 }
4533 delete_insn_and_edges (tail);
4534 continue;
4535 }
4536
4537 loop = XNEW (struct loop_info);
4538 loop->next = loops;
4539 loops = loop;
4540 loop->loop_no = nloops++;
4541 loop->blocks = VEC_alloc (basic_block, heap, 20);
4542 loop->block_bitmap = BITMAP_ALLOC (stack);
4543 bb->aux = loop;
4544
4545 if (dump_file)
4546 {
4547 fprintf (dump_file, ";; potential loop %d ending at\n",
4548 loop->loop_no);
4549 print_rtl_single (dump_file, tail);
4550 }
4551
4552 bfin_discover_loop (loop, bb, tail);
4553 }
4554 }
4555
4556 tmp_bitmap = BITMAP_ALLOC (stack);
4557 /* Compute loop nestings. */
4558 for (loop = loops; loop; loop = loop->next)
4559 {
4560 loop_info other;
4561 if (loop->bad)
4562 continue;
4563
4564 for (other = loop->next; other; other = other->next)
4565 {
4566 if (other->bad)
4567 continue;
4568
4569 bitmap_and (tmp_bitmap, other->block_bitmap, loop->block_bitmap);
4570 if (bitmap_empty_p (tmp_bitmap))
4571 continue;
4572 if (bitmap_equal_p (tmp_bitmap, other->block_bitmap))
4573 {
4574 other->outer = loop;
4575 VEC_safe_push (loop_info, heap, loop->loops, other);
4576 }
4577 else if (bitmap_equal_p (tmp_bitmap, loop->block_bitmap))
4578 {
4579 loop->outer = other;
4580 VEC_safe_push (loop_info, heap, other->loops, loop);
4581 }
4582 else
4583 {
4584 if (dump_file)
4585 fprintf (dump_file,
4586 ";; can't find suitable nesting for loops %d and %d\n",
4587 loop->loop_no, other->loop_no);
4588 loop->bad = other->bad = 1;
4589 }
4590 }
4591 }
4592 BITMAP_FREE (tmp_bitmap);
4593
4594 return loops;
4595 }
4596
4597 /* Free up the loop structures in LOOPS. */
4598 static void
4599 free_loops (loop_info loops)
4600 {
4601 while (loops)
4602 {
4603 loop_info loop = loops;
4604 loops = loop->next;
4605 VEC_free (loop_info, heap, loop->loops);
4606 VEC_free (basic_block, heap, loop->blocks);
4607 BITMAP_FREE (loop->block_bitmap);
4608 XDELETE (loop);
4609 }
4610 }
4611
4612 #define BB_AUX_INDEX(BB) ((unsigned)(BB)->aux)
4613
4614 /* The taken-branch edge from the loop end can actually go forward. Since the
4615 Blackfin's LSETUP instruction requires that the loop end be after the loop
4616 start, try to reorder a loop's basic blocks when we find such a case. */
4617 static void
4618 bfin_reorder_loops (loop_info loops, FILE *dump_file)
4619 {
4620 basic_block bb;
4621 loop_info loop;
4622
4623 FOR_EACH_BB (bb)
4624 bb->aux = NULL;
4625 cfg_layout_initialize (0);
4626
4627 for (loop = loops; loop; loop = loop->next)
4628 {
4629 unsigned index;
4630 basic_block bb;
4631 edge e;
4632 edge_iterator ei;
4633
4634 if (loop->bad)
4635 continue;
4636
4637 /* Recreate an index for basic blocks that represents their order. */
4638 for (bb = ENTRY_BLOCK_PTR->next_bb, index = 0;
4639 bb != EXIT_BLOCK_PTR;
4640 bb = bb->next_bb, index++)
4641 bb->aux = (PTR) index;
4642
4643 if (BB_AUX_INDEX (loop->head) < BB_AUX_INDEX (loop->tail))
4644 continue;
4645
4646 FOR_EACH_EDGE (e, ei, loop->head->succs)
4647 {
4648 if (bitmap_bit_p (loop->block_bitmap, e->dest->index)
4649 && BB_AUX_INDEX (e->dest) < BB_AUX_INDEX (loop->tail))
4650 {
4651 basic_block start_bb = e->dest;
4652 basic_block start_prev_bb = start_bb->prev_bb;
4653
4654 if (dump_file)
4655 fprintf (dump_file, ";; Moving block %d before block %d\n",
4656 loop->head->index, start_bb->index);
4657 loop->head->prev_bb->next_bb = loop->head->next_bb;
4658 loop->head->next_bb->prev_bb = loop->head->prev_bb;
4659
4660 loop->head->prev_bb = start_prev_bb;
4661 loop->head->next_bb = start_bb;
4662 start_prev_bb->next_bb = start_bb->prev_bb = loop->head;
4663 break;
4664 }
4665 }
4666 loops = loops->next;
4667 }
4668
4669 FOR_EACH_BB (bb)
4670 {
4671 if (bb->next_bb != EXIT_BLOCK_PTR)
4672 bb->aux = bb->next_bb;
4673 else
4674 bb->aux = NULL;
4675 }
4676 cfg_layout_finalize ();
4677 df_analyze ();
4678 }
4679
4680 /* Run from machine_dependent_reorg, this pass looks for doloop_end insns
4681 and tries to rewrite the RTL of these loops so that proper Blackfin
4682 hardware loops are generated. */
4683
4684 static void
4685 bfin_reorg_loops (FILE *dump_file)
4686 {
4687 loop_info loops = NULL;
4688 loop_info loop;
4689 basic_block bb;
4690 bitmap_obstack stack;
4691
4692 bitmap_obstack_initialize (&stack);
4693
4694 if (dump_file)
4695 fprintf (dump_file, ";; Find loops, first pass\n\n");
4696
4697 loops = bfin_discover_loops (&stack, dump_file);
4698
4699 if (dump_file)
4700 bfin_dump_loops (loops);
4701
4702 bfin_reorder_loops (loops, dump_file);
4703 free_loops (loops);
4704
4705 if (dump_file)
4706 fprintf (dump_file, ";; Find loops, second pass\n\n");
4707
4708 loops = bfin_discover_loops (&stack, dump_file);
4709 if (dump_file)
4710 {
4711 fprintf (dump_file, ";; All loops found:\n\n");
4712 bfin_dump_loops (loops);
4713 }
4714
4715 /* Now apply the optimizations. */
4716 for (loop = loops; loop; loop = loop->next)
4717 bfin_optimize_loop (loop);
4718
4719 if (dump_file)
4720 {
4721 fprintf (dump_file, ";; After hardware loops optimization:\n\n");
4722 bfin_dump_loops (loops);
4723 }
4724
4725 free_loops (loops);
4726
4727 if (dump_file)
4728 print_rtl (dump_file, get_insns ());
4729
4730 FOR_EACH_BB (bb)
4731 bb->aux = NULL;
4732
4733 splitting_loops = 1;
4734 FOR_EACH_BB (bb)
4735 {
4736 rtx insn = BB_END (bb);
4737 if (!JUMP_P (insn))
4738 continue;
4739
4740 try_split (PATTERN (insn), insn, 1);
4741 }
4742 splitting_loops = 0;
4743 }
4744 \f
4745 /* Possibly generate a SEQUENCE out of three insns found in SLOT.
4746 Returns true if we modified the insn chain, false otherwise. */
4747 static bool
4748 gen_one_bundle (rtx slot[3])
4749 {
4750 gcc_assert (slot[1] != NULL_RTX);
4751
4752 /* Don't add extra NOPs if optimizing for size. */
4753 if (optimize_size
4754 && (slot[0] == NULL_RTX || slot[2] == NULL_RTX))
4755 return false;
4756
4757 /* Verify that we really can do the multi-issue. */
4758 if (slot[0])
4759 {
4760 rtx t = NEXT_INSN (slot[0]);
4761 while (t != slot[1])
4762 {
4763 if (GET_CODE (t) != NOTE
4764 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4765 return false;
4766 t = NEXT_INSN (t);
4767 }
4768 }
4769 if (slot[2])
4770 {
4771 rtx t = NEXT_INSN (slot[1]);
4772 while (t != slot[2])
4773 {
4774 if (GET_CODE (t) != NOTE
4775 || NOTE_KIND (t) != NOTE_INSN_DELETED)
4776 return false;
4777 t = NEXT_INSN (t);
4778 }
4779 }
4780
4781 if (slot[0] == NULL_RTX)
4782 {
4783 slot[0] = emit_insn_before (gen_mnop (), slot[1]);
4784 df_insn_rescan (slot[0]);
4785 }
4786 if (slot[2] == NULL_RTX)
4787 {
4788 slot[2] = emit_insn_after (gen_forced_nop (), slot[1]);
4789 df_insn_rescan (slot[2]);
4790 }
4791
4792 /* Avoid line number information being printed inside one bundle. */
4793 if (INSN_LOCATOR (slot[1])
4794 && INSN_LOCATOR (slot[1]) != INSN_LOCATOR (slot[0]))
4795 INSN_LOCATOR (slot[1]) = INSN_LOCATOR (slot[0]);
4796 if (INSN_LOCATOR (slot[2])
4797 && INSN_LOCATOR (slot[2]) != INSN_LOCATOR (slot[0]))
4798 INSN_LOCATOR (slot[2]) = INSN_LOCATOR (slot[0]);
4799
4800 /* Terminate them with "|| " instead of ";" in the output. */
4801 PUT_MODE (slot[0], SImode);
4802 PUT_MODE (slot[1], SImode);
4803 /* Terminate the bundle, for the benefit of reorder_var_tracking_notes. */
4804 PUT_MODE (slot[2], QImode);
4805 return true;
4806 }
4807
4808 /* Go through all insns, and use the information generated during scheduling
4809 to generate SEQUENCEs to represent bundles of instructions issued
4810 simultaneously. */
4811
4812 static void
4813 bfin_gen_bundles (void)
4814 {
4815 basic_block bb;
4816 FOR_EACH_BB (bb)
4817 {
4818 rtx insn, next;
4819 rtx slot[3];
4820 int n_filled = 0;
4821
4822 slot[0] = slot[1] = slot[2] = NULL_RTX;
4823 for (insn = BB_HEAD (bb);; insn = next)
4824 {
4825 int at_end;
4826 rtx delete_this = NULL_RTX;
4827
4828 if (INSN_P (insn))
4829 {
4830 enum attr_type type = get_attr_type (insn);
4831
4832 if (type == TYPE_STALL)
4833 {
4834 gcc_assert (n_filled == 0);
4835 delete_this = insn;
4836 }
4837 else
4838 {
4839 if (type == TYPE_DSP32 || type == TYPE_DSP32SHIFTIMM)
4840 slot[0] = insn;
4841 else if (slot[1] == NULL_RTX)
4842 slot[1] = insn;
4843 else
4844 slot[2] = insn;
4845 n_filled++;
4846 }
4847 }
4848
4849 next = NEXT_INSN (insn);
4850 while (next && insn != BB_END (bb)
4851 && !(INSN_P (next)
4852 && GET_CODE (PATTERN (next)) != USE
4853 && GET_CODE (PATTERN (next)) != CLOBBER))
4854 {
4855 insn = next;
4856 next = NEXT_INSN (insn);
4857 }
4858
4859 /* BB_END can change due to emitting extra NOPs, so check here. */
4860 at_end = insn == BB_END (bb);
4861 if (delete_this == NULL_RTX && (at_end || GET_MODE (next) == TImode))
4862 {
4863 if ((n_filled < 2
4864 || !gen_one_bundle (slot))
4865 && slot[0] != NULL_RTX)
4866 {
4867 rtx pat = PATTERN (slot[0]);
4868 if (GET_CODE (pat) == SET
4869 && GET_CODE (SET_SRC (pat)) == UNSPEC
4870 && XINT (SET_SRC (pat), 1) == UNSPEC_32BIT)
4871 {
4872 SET_SRC (pat) = XVECEXP (SET_SRC (pat), 0, 0);
4873 INSN_CODE (slot[0]) = -1;
4874 df_insn_rescan (slot[0]);
4875 }
4876 }
4877 n_filled = 0;
4878 slot[0] = slot[1] = slot[2] = NULL_RTX;
4879 }
4880 if (delete_this != NULL_RTX)
4881 delete_insn (delete_this);
4882 if (at_end)
4883 break;
4884 }
4885 }
4886 }
4887
4888 /* Ensure that no var tracking notes are emitted in the middle of a
4889 three-instruction bundle. */
4890
4891 static void
4892 reorder_var_tracking_notes (void)
4893 {
4894 basic_block bb;
4895 FOR_EACH_BB (bb)
4896 {
4897 rtx insn, next;
4898 rtx queue = NULL_RTX;
4899 bool in_bundle = false;
4900
4901 for (insn = BB_HEAD (bb); insn != BB_END (bb); insn = next)
4902 {
4903 next = NEXT_INSN (insn);
4904
4905 if (INSN_P (insn))
4906 {
4907 /* Emit queued up notes at the last instruction of a bundle. */
4908 if (GET_MODE (insn) == QImode)
4909 {
4910 while (queue)
4911 {
4912 rtx next_queue = PREV_INSN (queue);
4913 PREV_INSN (NEXT_INSN (insn)) = queue;
4914 NEXT_INSN (queue) = NEXT_INSN (insn);
4915 NEXT_INSN (insn) = queue;
4916 PREV_INSN (queue) = insn;
4917 queue = next_queue;
4918 }
4919 in_bundle = false;
4920 }
4921 else if (GET_MODE (insn) == SImode)
4922 in_bundle = true;
4923 }
4924 else if (NOTE_P (insn) && NOTE_KIND (insn) == NOTE_INSN_VAR_LOCATION)
4925 {
4926 if (in_bundle)
4927 {
4928 rtx prev = PREV_INSN (insn);
4929 PREV_INSN (next) = prev;
4930 NEXT_INSN (prev) = next;
4931
4932 PREV_INSN (insn) = queue;
4933 queue = insn;
4934 }
4935 }
4936 }
4937 }
4938 }
4939 \f
4940 /* On some silicon revisions, functions shorter than a certain number of cycles
4941 can cause unpredictable behaviour. Work around this by adding NOPs as
4942 needed. */
4943 static void
4944 workaround_rts_anomaly (void)
4945 {
4946 rtx insn, first_insn = NULL_RTX;
4947 int cycles = 4;
4948
4949 if (! ENABLE_WA_RETS)
4950 return;
4951
4952 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
4953 {
4954 rtx pat;
4955
4956 if (BARRIER_P (insn))
4957 return;
4958
4959 if (NOTE_P (insn) || LABEL_P (insn))
4960 continue;
4961
4962 if (first_insn == NULL_RTX)
4963 first_insn = insn;
4964 pat = PATTERN (insn);
4965 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
4966 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
4967 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
4968 continue;
4969
4970 if (CALL_P (insn))
4971 return;
4972
4973 if (JUMP_P (insn))
4974 {
4975 if (recog_memoized (insn) == CODE_FOR_return_internal)
4976 break;
4977
4978 /* Nothing to worry about for direct jumps. */
4979 if (!any_condjump_p (insn))
4980 return;
4981 if (cycles <= 1)
4982 return;
4983 cycles--;
4984 }
4985 else if (INSN_P (insn))
4986 {
4987 rtx pat = PATTERN (insn);
4988 int this_cycles = 1;
4989
4990 if (GET_CODE (pat) == PARALLEL)
4991 {
4992 if (push_multiple_operation (pat, VOIDmode)
4993 || pop_multiple_operation (pat, VOIDmode))
4994 this_cycles = n_regs_to_save;
4995 }
4996 else
4997 {
4998 enum insn_code icode = recog_memoized (insn);
4999 if (icode == CODE_FOR_link)
5000 this_cycles = 4;
5001 else if (icode == CODE_FOR_unlink)
5002 this_cycles = 3;
5003 else if (icode == CODE_FOR_mulsi3)
5004 this_cycles = 5;
5005 }
5006 if (this_cycles >= cycles)
5007 return;
5008
5009 cycles -= this_cycles;
5010 }
5011 }
5012 while (cycles > 0)
5013 {
5014 emit_insn_before (gen_nop (), first_insn);
5015 cycles--;
5016 }
5017 }
5018
5019 /* Return an insn type for INSN that can be used by the caller for anomaly
5020 workarounds. This differs from plain get_attr_type in that it handles
5021 SEQUENCEs. */
5022
5023 static enum attr_type
5024 type_for_anomaly (rtx insn)
5025 {
5026 rtx pat = PATTERN (insn);
5027 if (GET_CODE (pat) == SEQUENCE)
5028 {
5029 enum attr_type t;
5030 t = get_attr_type (XVECEXP (pat, 0, 1));
5031 if (t == TYPE_MCLD)
5032 return t;
5033 t = get_attr_type (XVECEXP (pat, 0, 2));
5034 if (t == TYPE_MCLD)
5035 return t;
5036 return TYPE_MCST;
5037 }
5038 else
5039 return get_attr_type (insn);
5040 }
5041
5042 /* Return nonzero if INSN contains any loads that may trap. It handles
5043 SEQUENCEs correctly. */
5044
5045 static bool
5046 trapping_loads_p (rtx insn)
5047 {
5048 rtx pat = PATTERN (insn);
5049 if (GET_CODE (pat) == SEQUENCE)
5050 {
5051 enum attr_type t;
5052 t = get_attr_type (XVECEXP (pat, 0, 1));
5053 if (t == TYPE_MCLD
5054 && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 1)))))
5055 return true;
5056 t = get_attr_type (XVECEXP (pat, 0, 2));
5057 if (t == TYPE_MCLD
5058 && may_trap_p (SET_SRC (PATTERN (XVECEXP (pat, 0, 2)))))
5059 return true;
5060 return false;
5061 }
5062 else
5063 return may_trap_p (SET_SRC (single_set (insn)));
5064 }
5065
5066 /* Return INSN if it is of TYPE_MCLD. Alternatively, if INSN is the start of
5067 a three-insn bundle, see if one of them is a load and return that if so.
5068 Return NULL_RTX if the insn does not contain loads. */
5069 static rtx
5070 find_load (rtx insn)
5071 {
5072 if (get_attr_type (insn) == TYPE_MCLD)
5073 return insn;
5074 if (GET_MODE (insn) != SImode)
5075 return NULL_RTX;
5076 do {
5077 insn = NEXT_INSN (insn);
5078 if ((GET_MODE (insn) == SImode || GET_MODE (insn) == QImode)
5079 && get_attr_type (insn) == TYPE_MCLD)
5080 return insn;
5081 } while (GET_MODE (insn) != QImode);
5082 return NULL_RTX;
5083 }
5084
5085 /* Determine whether PAT is an indirect call pattern. */
5086 static bool
5087 indirect_call_p (rtx pat)
5088 {
5089 if (GET_CODE (pat) == PARALLEL)
5090 pat = XVECEXP (pat, 0, 0);
5091 if (GET_CODE (pat) == SET)
5092 pat = SET_SRC (pat);
5093 gcc_assert (GET_CODE (pat) == CALL);
5094 pat = XEXP (pat, 0);
5095 gcc_assert (GET_CODE (pat) == MEM);
5096 pat = XEXP (pat, 0);
5097
5098 return REG_P (pat);
5099 }
5100
5101 static void
5102 workaround_speculation (void)
5103 {
5104 rtx insn, next;
5105 rtx last_condjump = NULL_RTX;
5106 int cycles_since_jump = INT_MAX;
5107 int delay_added = 0;
5108
5109 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
5110 && ! ENABLE_WA_INDIRECT_CALLS)
5111 return;
5112
5113 /* First pass: find predicted-false branches; if something after them
5114 needs nops, insert them or change the branch to predict true. */
5115 for (insn = get_insns (); insn; insn = next)
5116 {
5117 rtx pat;
5118 int delay_needed = 0;
5119
5120 next = find_next_insn_start (insn);
5121
5122 if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
5123 continue;
5124
5125 pat = PATTERN (insn);
5126 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5127 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5128 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5129 continue;
5130
5131 if (JUMP_P (insn))
5132 {
5133 if (any_condjump_p (insn)
5134 && ! cbranch_predicted_taken_p (insn))
5135 {
5136 last_condjump = insn;
5137 delay_added = 0;
5138 cycles_since_jump = 0;
5139 }
5140 else
5141 cycles_since_jump = INT_MAX;
5142 }
5143 else if (CALL_P (insn))
5144 {
5145 if (cycles_since_jump < INT_MAX)
5146 cycles_since_jump++;
5147 if (indirect_call_p (pat) && ENABLE_WA_INDIRECT_CALLS)
5148 {
5149 delay_needed = 3;
5150 }
5151 }
5152 else if (INSN_P (insn))
5153 {
5154 rtx load_insn = find_load (insn);
5155 enum attr_type type = type_for_anomaly (insn);
5156
5157 if (cycles_since_jump < INT_MAX)
5158 cycles_since_jump++;
5159
5160 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
5161 {
5162 if (trapping_loads_p (load_insn))
5163 delay_needed = 4;
5164 }
5165 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
5166 delay_needed = 3;
5167 }
5168
5169 if (delay_needed > cycles_since_jump
5170 && (delay_needed - cycles_since_jump) > delay_added)
5171 {
5172 rtx pat1;
5173 int num_clobbers;
5174 rtx *op = recog_data.operand;
5175
5176 delay_needed -= cycles_since_jump;
5177
5178 extract_insn (last_condjump);
5179 if (optimize_size)
5180 {
5181 pat1 = gen_cbranch_predicted_taken (op[0], op[1], op[2],
5182 op[3]);
5183 cycles_since_jump = INT_MAX;
5184 }
5185 else
5186 {
5187 /* Do not adjust cycles_since_jump in this case, so that
5188 we'll increase the number of NOPs for a subsequent insn
5189 if necessary. */
5190 pat1 = gen_cbranch_with_nops (op[0], op[1], op[2], op[3],
5191 GEN_INT (delay_needed));
5192 delay_added = delay_needed;
5193 }
5194 PATTERN (last_condjump) = pat1;
5195 INSN_CODE (last_condjump) = recog (pat1, insn, &num_clobbers);
5196 }
5197 if (CALL_P (insn))
5198 {
5199 cycles_since_jump = INT_MAX;
5200 delay_added = 0;
5201 }
5202 }
5203
5204 /* Second pass: for predicted-true branches, see if anything at the
5205 branch destination needs extra nops. */
5206 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5207 {
5208 int cycles_since_jump;
5209 if (JUMP_P (insn)
5210 && any_condjump_p (insn)
5211 && (INSN_CODE (insn) == CODE_FOR_cbranch_predicted_taken
5212 || cbranch_predicted_taken_p (insn)))
5213 {
5214 rtx target = JUMP_LABEL (insn);
5215 rtx label = target;
5216 rtx next_tgt;
5217
5218 cycles_since_jump = 0;
5219 for (; target && cycles_since_jump < 3; target = next_tgt)
5220 {
5221 rtx pat;
5222
5223 next_tgt = find_next_insn_start (target);
5224
5225 if (NOTE_P (target) || BARRIER_P (target) || LABEL_P (target))
5226 continue;
5227
5228 pat = PATTERN (target);
5229 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5230 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5231 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5232 continue;
5233
5234 if (INSN_P (target))
5235 {
5236 rtx load_insn = find_load (target);
5237 enum attr_type type = type_for_anomaly (target);
5238 int delay_needed = 0;
5239 if (cycles_since_jump < INT_MAX)
5240 cycles_since_jump++;
5241
5242 if (load_insn && ENABLE_WA_SPECULATIVE_LOADS)
5243 {
5244 if (trapping_loads_p (load_insn))
5245 delay_needed = 2;
5246 }
5247 else if (type == TYPE_SYNC && ENABLE_WA_SPECULATIVE_SYNCS)
5248 delay_needed = 2;
5249
5250 if (delay_needed > cycles_since_jump)
5251 {
5252 rtx prev = prev_real_insn (label);
5253 delay_needed -= cycles_since_jump;
5254 if (dump_file)
5255 fprintf (dump_file, "Adding %d nops after %d\n",
5256 delay_needed, INSN_UID (label));
5257 if (JUMP_P (prev)
5258 && INSN_CODE (prev) == CODE_FOR_cbranch_with_nops)
5259 {
5260 rtx x;
5261 HOST_WIDE_INT v;
5262
5263 if (dump_file)
5264 fprintf (dump_file,
5265 "Reducing nops on insn %d.\n",
5266 INSN_UID (prev));
5267 x = PATTERN (prev);
5268 x = XVECEXP (x, 0, 1);
5269 v = INTVAL (XVECEXP (x, 0, 0)) - delay_needed;
5270 XVECEXP (x, 0, 0) = GEN_INT (v);
5271 }
5272 while (delay_needed-- > 0)
5273 emit_insn_after (gen_nop (), label);
5274 break;
5275 }
5276 }
5277 }
5278 }
5279 }
5280 }
5281
5282 /* Called just before the final scheduling pass. If we need to insert NOPs
5283 later on to work around speculative loads, insert special placeholder
5284 insns that cause loads to be delayed for as many cycles as necessary
5285 (and possible). This reduces the number of NOPs we need to add.
5286 The dummy insns we generate are later removed by bfin_gen_bundles. */
5287 static void
5288 add_sched_insns_for_speculation (void)
5289 {
5290 rtx insn;
5291
5292 if (! ENABLE_WA_SPECULATIVE_LOADS && ! ENABLE_WA_SPECULATIVE_SYNCS
5293 && ! ENABLE_WA_INDIRECT_CALLS)
5294 return;
5295
5296 /* First pass: find predicted-false branches; if something after them
5297 needs nops, insert them or change the branch to predict true. */
5298 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5299 {
5300 rtx pat;
5301
5302 if (NOTE_P (insn) || BARRIER_P (insn) || LABEL_P (insn))
5303 continue;
5304
5305 pat = PATTERN (insn);
5306 if (GET_CODE (pat) == USE || GET_CODE (pat) == CLOBBER
5307 || GET_CODE (pat) == ASM_INPUT || GET_CODE (pat) == ADDR_VEC
5308 || GET_CODE (pat) == ADDR_DIFF_VEC || asm_noperands (pat) >= 0)
5309 continue;
5310
5311 if (JUMP_P (insn))
5312 {
5313 if (any_condjump_p (insn)
5314 && !cbranch_predicted_taken_p (insn))
5315 {
5316 rtx n = next_real_insn (insn);
5317 emit_insn_before (gen_stall (GEN_INT (3)), n);
5318 }
5319 }
5320 }
5321
5322 /* Second pass: for predicted-true branches, see if anything at the
5323 branch destination needs extra nops. */
5324 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
5325 {
5326 if (JUMP_P (insn)
5327 && any_condjump_p (insn)
5328 && (cbranch_predicted_taken_p (insn)))
5329 {
5330 rtx target = JUMP_LABEL (insn);
5331 rtx next = next_real_insn (target);
5332
5333 if (GET_CODE (PATTERN (next)) == UNSPEC_VOLATILE
5334 && get_attr_type (next) == TYPE_STALL)
5335 continue;
5336 emit_insn_before (gen_stall (GEN_INT (1)), next);
5337 }
5338 }
5339 }
5340
5341 /* We use the machine specific reorg pass for emitting CSYNC instructions
5342 after conditional branches as needed.
5343
5344 The Blackfin is unusual in that a code sequence like
5345 if cc jump label
5346 r0 = (p0)
5347 may speculatively perform the load even if the condition isn't true. This
5348 happens for a branch that is predicted not taken, because the pipeline
5349 isn't flushed or stalled, so the early stages of the following instructions,
5350 which perform the memory reference, are allowed to execute before the
5351 jump condition is evaluated.
5352 Therefore, we must insert additional instructions in all places where this
5353 could lead to incorrect behavior. The manual recommends CSYNC, while
5354 VDSP seems to use NOPs (even though its corresponding compiler option is
5355 named CSYNC).
5356
5357 When optimizing for speed, we emit NOPs, which seems faster than a CSYNC.
5358 When optimizing for size, we turn the branch into a predicted taken one.
5359 This may be slower due to mispredicts, but saves code size. */
5360
5361 static void
5362 bfin_reorg (void)
5363 {
5364 /* We are freeing block_for_insn in the toplev to keep compatibility
5365 with old MDEP_REORGS that are not CFG based. Recompute it now. */
5366 compute_bb_for_insn ();
5367
5368 if (bfin_flag_schedule_insns2)
5369 {
5370 splitting_for_sched = 1;
5371 split_all_insns ();
5372 splitting_for_sched = 0;
5373
5374 add_sched_insns_for_speculation ();
5375
5376 timevar_push (TV_SCHED2);
5377 schedule_insns ();
5378 timevar_pop (TV_SCHED2);
5379
5380 /* Examine the schedule and insert nops as necessary for 64-bit parallel
5381 instructions. */
5382 bfin_gen_bundles ();
5383 }
5384
5385 df_analyze ();
5386
5387 /* Doloop optimization */
5388 if (cfun->machine->has_hardware_loops)
5389 bfin_reorg_loops (dump_file);
5390
5391 workaround_speculation ();
5392
5393 if (bfin_flag_var_tracking)
5394 {
5395 timevar_push (TV_VAR_TRACKING);
5396 variable_tracking_main ();
5397 reorder_var_tracking_notes ();
5398 timevar_pop (TV_VAR_TRACKING);
5399 }
5400
5401 df_finish_pass (false);
5402
5403 workaround_rts_anomaly ();
5404 }
5405 \f
5406 /* Handle interrupt_handler, exception_handler and nmi_handler function
5407 attributes; arguments as in struct attribute_spec.handler. */
5408
5409 static tree
5410 handle_int_attribute (tree *node, tree name,
5411 tree args ATTRIBUTE_UNUSED,
5412 int flags ATTRIBUTE_UNUSED,
5413 bool *no_add_attrs)
5414 {
5415 tree x = *node;
5416 if (TREE_CODE (x) == FUNCTION_DECL)
5417 x = TREE_TYPE (x);
5418
5419 if (TREE_CODE (x) != FUNCTION_TYPE)
5420 {
5421 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5422 name);
5423 *no_add_attrs = true;
5424 }
5425 else if (funkind (x) != SUBROUTINE)
5426 error ("multiple function type attributes specified");
5427
5428 return NULL_TREE;
5429 }
5430
5431 /* Return 0 if the attributes for two types are incompatible, 1 if they
5432 are compatible, and 2 if they are nearly compatible (which causes a
5433 warning to be generated). */
5434
5435 static int
5436 bfin_comp_type_attributes (const_tree type1, const_tree type2)
5437 {
5438 e_funkind kind1, kind2;
5439
5440 if (TREE_CODE (type1) != FUNCTION_TYPE)
5441 return 1;
5442
5443 kind1 = funkind (type1);
5444 kind2 = funkind (type2);
5445
5446 if (kind1 != kind2)
5447 return 0;
5448
5449 /* Check for mismatched modifiers */
5450 if (!lookup_attribute ("nesting", TYPE_ATTRIBUTES (type1))
5451 != !lookup_attribute ("nesting", TYPE_ATTRIBUTES (type2)))
5452 return 0;
5453
5454 if (!lookup_attribute ("saveall", TYPE_ATTRIBUTES (type1))
5455 != !lookup_attribute ("saveall", TYPE_ATTRIBUTES (type2)))
5456 return 0;
5457
5458 if (!lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type1))
5459 != !lookup_attribute ("kspisusp", TYPE_ATTRIBUTES (type2)))
5460 return 0;
5461
5462 if (!lookup_attribute ("longcall", TYPE_ATTRIBUTES (type1))
5463 != !lookup_attribute ("longcall", TYPE_ATTRIBUTES (type2)))
5464 return 0;
5465
5466 return 1;
5467 }
5468
5469 /* Handle a "longcall" or "shortcall" attribute; arguments as in
5470 struct attribute_spec.handler. */
5471
5472 static tree
5473 bfin_handle_longcall_attribute (tree *node, tree name,
5474 tree args ATTRIBUTE_UNUSED,
5475 int flags ATTRIBUTE_UNUSED,
5476 bool *no_add_attrs)
5477 {
5478 if (TREE_CODE (*node) != FUNCTION_TYPE
5479 && TREE_CODE (*node) != FIELD_DECL
5480 && TREE_CODE (*node) != TYPE_DECL)
5481 {
5482 warning (OPT_Wattributes, "%qE attribute only applies to functions",
5483 name);
5484 *no_add_attrs = true;
5485 }
5486
5487 if ((strcmp (IDENTIFIER_POINTER (name), "longcall") == 0
5488 && lookup_attribute ("shortcall", TYPE_ATTRIBUTES (*node)))
5489 || (strcmp (IDENTIFIER_POINTER (name), "shortcall") == 0
5490 && lookup_attribute ("longcall", TYPE_ATTRIBUTES (*node))))
5491 {
5492 warning (OPT_Wattributes,
5493 "can't apply both longcall and shortcall attributes to the same function");
5494 *no_add_attrs = true;
5495 }
5496
5497 return NULL_TREE;
5498 }
5499
5500 /* Handle a "l1_text" attribute; arguments as in
5501 struct attribute_spec.handler. */
5502
5503 static tree
5504 bfin_handle_l1_text_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5505 int ARG_UNUSED (flags), bool *no_add_attrs)
5506 {
5507 tree decl = *node;
5508
5509 if (TREE_CODE (decl) != FUNCTION_DECL)
5510 {
5511 error ("%qE attribute only applies to functions",
5512 name);
5513 *no_add_attrs = true;
5514 }
5515
5516 /* The decl may have already been given a section attribute
5517 from a previous declaration. Ensure they match. */
5518 else if (DECL_SECTION_NAME (decl) != NULL_TREE
5519 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5520 ".l1.text") != 0)
5521 {
5522 error ("section of %q+D conflicts with previous declaration",
5523 decl);
5524 *no_add_attrs = true;
5525 }
5526 else
5527 DECL_SECTION_NAME (decl) = build_string (9, ".l1.text");
5528
5529 return NULL_TREE;
5530 }
5531
5532 /* Handle a "l1_data", "l1_data_A" or "l1_data_B" attribute;
5533 arguments as in struct attribute_spec.handler. */
5534
5535 static tree
5536 bfin_handle_l1_data_attribute (tree *node, tree name, tree ARG_UNUSED (args),
5537 int ARG_UNUSED (flags), bool *no_add_attrs)
5538 {
5539 tree decl = *node;
5540
5541 if (TREE_CODE (decl) != VAR_DECL)
5542 {
5543 error ("%qE attribute only applies to variables",
5544 name);
5545 *no_add_attrs = true;
5546 }
5547 else if (current_function_decl != NULL_TREE
5548 && !TREE_STATIC (decl))
5549 {
5550 error ("%qE attribute cannot be specified for local variables",
5551 name);
5552 *no_add_attrs = true;
5553 }
5554 else
5555 {
5556 const char *section_name;
5557
5558 if (strcmp (IDENTIFIER_POINTER (name), "l1_data") == 0)
5559 section_name = ".l1.data";
5560 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_A") == 0)
5561 section_name = ".l1.data.A";
5562 else if (strcmp (IDENTIFIER_POINTER (name), "l1_data_B") == 0)
5563 section_name = ".l1.data.B";
5564 else
5565 gcc_unreachable ();
5566
5567 /* The decl may have already been given a section attribute
5568 from a previous declaration. Ensure they match. */
5569 if (DECL_SECTION_NAME (decl) != NULL_TREE
5570 && strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)),
5571 section_name) != 0)
5572 {
5573 error ("section of %q+D conflicts with previous declaration",
5574 decl);
5575 *no_add_attrs = true;
5576 }
5577 else
5578 DECL_SECTION_NAME (decl)
5579 = build_string (strlen (section_name) + 1, section_name);
5580 }
5581
5582 return NULL_TREE;
5583 }
5584
5585 /* Table of valid machine attributes. */
5586 static const struct attribute_spec bfin_attribute_table[] =
5587 {
5588 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
5589 { "interrupt_handler", 0, 0, false, true, true, handle_int_attribute },
5590 { "exception_handler", 0, 0, false, true, true, handle_int_attribute },
5591 { "nmi_handler", 0, 0, false, true, true, handle_int_attribute },
5592 { "nesting", 0, 0, false, true, true, NULL },
5593 { "kspisusp", 0, 0, false, true, true, NULL },
5594 { "saveall", 0, 0, false, true, true, NULL },
5595 { "longcall", 0, 0, false, true, true, bfin_handle_longcall_attribute },
5596 { "shortcall", 0, 0, false, true, true, bfin_handle_longcall_attribute },
5597 { "l1_text", 0, 0, true, false, false, bfin_handle_l1_text_attribute },
5598 { "l1_data", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5599 { "l1_data_A", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5600 { "l1_data_B", 0, 0, true, false, false, bfin_handle_l1_data_attribute },
5601 { NULL, 0, 0, false, false, false, NULL }
5602 };
5603 \f
5604 /* Implementation of TARGET_ASM_INTEGER. When using FD-PIC, we need to
5605 tell the assembler to generate pointers to function descriptors in
5606 some cases. */
5607
5608 static bool
5609 bfin_assemble_integer (rtx value, unsigned int size, int aligned_p)
5610 {
5611 if (TARGET_FDPIC && size == UNITS_PER_WORD)
5612 {
5613 if (GET_CODE (value) == SYMBOL_REF
5614 && SYMBOL_REF_FUNCTION_P (value))
5615 {
5616 fputs ("\t.picptr\tfuncdesc(", asm_out_file);
5617 output_addr_const (asm_out_file, value);
5618 fputs (")\n", asm_out_file);
5619 return true;
5620 }
5621 if (!aligned_p)
5622 {
5623 /* We've set the unaligned SI op to NULL, so we always have to
5624 handle the unaligned case here. */
5625 assemble_integer_with_op ("\t.4byte\t", value);
5626 return true;
5627 }
5628 }
5629 return default_assemble_integer (value, size, aligned_p);
5630 }
5631 \f
5632 /* Output the assembler code for a thunk function. THUNK_DECL is the
5633 declaration for the thunk function itself, FUNCTION is the decl for
5634 the target function. DELTA is an immediate constant offset to be
5635 added to THIS. If VCALL_OFFSET is nonzero, the word at
5636 *(*this + vcall_offset) should be added to THIS. */
5637
5638 static void
5639 bfin_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
5640 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
5641 HOST_WIDE_INT vcall_offset, tree function)
5642 {
5643 rtx xops[3];
5644 /* The this parameter is passed as the first argument. */
5645 rtx this_rtx = gen_rtx_REG (Pmode, REG_R0);
5646
5647 /* Adjust the this parameter by a fixed constant. */
5648 if (delta)
5649 {
5650 xops[1] = this_rtx;
5651 if (delta >= -64 && delta <= 63)
5652 {
5653 xops[0] = GEN_INT (delta);
5654 output_asm_insn ("%1 += %0;", xops);
5655 }
5656 else if (delta >= -128 && delta < -64)
5657 {
5658 xops[0] = GEN_INT (delta + 64);
5659 output_asm_insn ("%1 += -64; %1 += %0;", xops);
5660 }
5661 else if (delta > 63 && delta <= 126)
5662 {
5663 xops[0] = GEN_INT (delta - 63);
5664 output_asm_insn ("%1 += 63; %1 += %0;", xops);
5665 }
5666 else
5667 {
5668 xops[0] = GEN_INT (delta);
5669 output_asm_insn ("r3.l = %h0; r3.h = %d0; %1 = %1 + r3;", xops);
5670 }
5671 }
5672
5673 /* Adjust the this parameter by a value stored in the vtable. */
5674 if (vcall_offset)
5675 {
5676 rtx p2tmp = gen_rtx_REG (Pmode, REG_P2);
5677 rtx tmp = gen_rtx_REG (Pmode, REG_R3);
5678
5679 xops[1] = tmp;
5680 xops[2] = p2tmp;
5681 output_asm_insn ("%2 = r0; %2 = [%2];", xops);
5682
5683 /* Adjust the this parameter. */
5684 xops[0] = gen_rtx_MEM (Pmode, plus_constant (p2tmp, vcall_offset));
5685 if (!memory_operand (xops[0], Pmode))
5686 {
5687 rtx tmp2 = gen_rtx_REG (Pmode, REG_P1);
5688 xops[0] = GEN_INT (vcall_offset);
5689 xops[1] = tmp2;
5690 output_asm_insn ("%h1 = %h0; %d1 = %d0; %2 = %2 + %1", xops);
5691 xops[0] = gen_rtx_MEM (Pmode, p2tmp);
5692 }
5693 xops[2] = this_rtx;
5694 output_asm_insn ("%1 = %0; %2 = %2 + %1;", xops);
5695 }
5696
5697 xops[0] = XEXP (DECL_RTL (function), 0);
5698 if (1 || !flag_pic || (*targetm.binds_local_p) (function))
5699 output_asm_insn ("jump.l\t%P0", xops);
5700 }
5701 \f
5702 /* Codes for all the Blackfin builtins. */
5703 enum bfin_builtins
5704 {
5705 BFIN_BUILTIN_CSYNC,
5706 BFIN_BUILTIN_SSYNC,
5707 BFIN_BUILTIN_ONES,
5708 BFIN_BUILTIN_COMPOSE_2X16,
5709 BFIN_BUILTIN_EXTRACTLO,
5710 BFIN_BUILTIN_EXTRACTHI,
5711
5712 BFIN_BUILTIN_SSADD_2X16,
5713 BFIN_BUILTIN_SSSUB_2X16,
5714 BFIN_BUILTIN_SSADDSUB_2X16,
5715 BFIN_BUILTIN_SSSUBADD_2X16,
5716 BFIN_BUILTIN_MULT_2X16,
5717 BFIN_BUILTIN_MULTR_2X16,
5718 BFIN_BUILTIN_NEG_2X16,
5719 BFIN_BUILTIN_ABS_2X16,
5720 BFIN_BUILTIN_MIN_2X16,
5721 BFIN_BUILTIN_MAX_2X16,
5722
5723 BFIN_BUILTIN_SSADD_1X16,
5724 BFIN_BUILTIN_SSSUB_1X16,
5725 BFIN_BUILTIN_MULT_1X16,
5726 BFIN_BUILTIN_MULTR_1X16,
5727 BFIN_BUILTIN_NORM_1X16,
5728 BFIN_BUILTIN_NEG_1X16,
5729 BFIN_BUILTIN_ABS_1X16,
5730 BFIN_BUILTIN_MIN_1X16,
5731 BFIN_BUILTIN_MAX_1X16,
5732
5733 BFIN_BUILTIN_SUM_2X16,
5734 BFIN_BUILTIN_DIFFHL_2X16,
5735 BFIN_BUILTIN_DIFFLH_2X16,
5736
5737 BFIN_BUILTIN_SSADD_1X32,
5738 BFIN_BUILTIN_SSSUB_1X32,
5739 BFIN_BUILTIN_NORM_1X32,
5740 BFIN_BUILTIN_ROUND_1X32,
5741 BFIN_BUILTIN_NEG_1X32,
5742 BFIN_BUILTIN_ABS_1X32,
5743 BFIN_BUILTIN_MIN_1X32,
5744 BFIN_BUILTIN_MAX_1X32,
5745 BFIN_BUILTIN_MULT_1X32,
5746 BFIN_BUILTIN_MULT_1X32X32,
5747 BFIN_BUILTIN_MULT_1X32X32NS,
5748
5749 BFIN_BUILTIN_MULHISILL,
5750 BFIN_BUILTIN_MULHISILH,
5751 BFIN_BUILTIN_MULHISIHL,
5752 BFIN_BUILTIN_MULHISIHH,
5753
5754 BFIN_BUILTIN_LSHIFT_1X16,
5755 BFIN_BUILTIN_LSHIFT_2X16,
5756 BFIN_BUILTIN_SSASHIFT_1X16,
5757 BFIN_BUILTIN_SSASHIFT_2X16,
5758 BFIN_BUILTIN_SSASHIFT_1X32,
5759
5760 BFIN_BUILTIN_CPLX_MUL_16,
5761 BFIN_BUILTIN_CPLX_MAC_16,
5762 BFIN_BUILTIN_CPLX_MSU_16,
5763
5764 BFIN_BUILTIN_CPLX_MUL_16_S40,
5765 BFIN_BUILTIN_CPLX_MAC_16_S40,
5766 BFIN_BUILTIN_CPLX_MSU_16_S40,
5767
5768 BFIN_BUILTIN_CPLX_SQU,
5769
5770 BFIN_BUILTIN_LOADBYTES,
5771
5772 BFIN_BUILTIN_MAX
5773 };
5774
5775 #define def_builtin(NAME, TYPE, CODE) \
5776 do { \
5777 add_builtin_function ((NAME), (TYPE), (CODE), BUILT_IN_MD, \
5778 NULL, NULL_TREE); \
5779 } while (0)
5780
5781 /* Set up all builtin functions for this target. */
5782 static void
5783 bfin_init_builtins (void)
5784 {
5785 tree V2HI_type_node = build_vector_type_for_mode (intHI_type_node, V2HImode);
5786 tree void_ftype_void
5787 = build_function_type (void_type_node, void_list_node);
5788 tree short_ftype_short
5789 = build_function_type_list (short_integer_type_node, short_integer_type_node,
5790 NULL_TREE);
5791 tree short_ftype_int_int
5792 = build_function_type_list (short_integer_type_node, integer_type_node,
5793 integer_type_node, NULL_TREE);
5794 tree int_ftype_int_int
5795 = build_function_type_list (integer_type_node, integer_type_node,
5796 integer_type_node, NULL_TREE);
5797 tree int_ftype_int
5798 = build_function_type_list (integer_type_node, integer_type_node,
5799 NULL_TREE);
5800 tree short_ftype_int
5801 = build_function_type_list (short_integer_type_node, integer_type_node,
5802 NULL_TREE);
5803 tree int_ftype_v2hi_v2hi
5804 = build_function_type_list (integer_type_node, V2HI_type_node,
5805 V2HI_type_node, NULL_TREE);
5806 tree v2hi_ftype_v2hi_v2hi
5807 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5808 V2HI_type_node, NULL_TREE);
5809 tree v2hi_ftype_v2hi_v2hi_v2hi
5810 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5811 V2HI_type_node, V2HI_type_node, NULL_TREE);
5812 tree v2hi_ftype_int_int
5813 = build_function_type_list (V2HI_type_node, integer_type_node,
5814 integer_type_node, NULL_TREE);
5815 tree v2hi_ftype_v2hi_int
5816 = build_function_type_list (V2HI_type_node, V2HI_type_node,
5817 integer_type_node, NULL_TREE);
5818 tree int_ftype_short_short
5819 = build_function_type_list (integer_type_node, short_integer_type_node,
5820 short_integer_type_node, NULL_TREE);
5821 tree v2hi_ftype_v2hi
5822 = build_function_type_list (V2HI_type_node, V2HI_type_node, NULL_TREE);
5823 tree short_ftype_v2hi
5824 = build_function_type_list (short_integer_type_node, V2HI_type_node,
5825 NULL_TREE);
5826 tree int_ftype_pint
5827 = build_function_type_list (integer_type_node,
5828 build_pointer_type (integer_type_node),
5829 NULL_TREE);
5830
5831 /* Add the remaining MMX insns with somewhat more complicated types. */
5832 def_builtin ("__builtin_bfin_csync", void_ftype_void, BFIN_BUILTIN_CSYNC);
5833 def_builtin ("__builtin_bfin_ssync", void_ftype_void, BFIN_BUILTIN_SSYNC);
5834
5835 def_builtin ("__builtin_bfin_ones", short_ftype_int, BFIN_BUILTIN_ONES);
5836
5837 def_builtin ("__builtin_bfin_compose_2x16", v2hi_ftype_int_int,
5838 BFIN_BUILTIN_COMPOSE_2X16);
5839 def_builtin ("__builtin_bfin_extract_hi", short_ftype_v2hi,
5840 BFIN_BUILTIN_EXTRACTHI);
5841 def_builtin ("__builtin_bfin_extract_lo", short_ftype_v2hi,
5842 BFIN_BUILTIN_EXTRACTLO);
5843
5844 def_builtin ("__builtin_bfin_min_fr2x16", v2hi_ftype_v2hi_v2hi,
5845 BFIN_BUILTIN_MIN_2X16);
5846 def_builtin ("__builtin_bfin_max_fr2x16", v2hi_ftype_v2hi_v2hi,
5847 BFIN_BUILTIN_MAX_2X16);
5848
5849 def_builtin ("__builtin_bfin_add_fr2x16", v2hi_ftype_v2hi_v2hi,
5850 BFIN_BUILTIN_SSADD_2X16);
5851 def_builtin ("__builtin_bfin_sub_fr2x16", v2hi_ftype_v2hi_v2hi,
5852 BFIN_BUILTIN_SSSUB_2X16);
5853 def_builtin ("__builtin_bfin_dspaddsubsat", v2hi_ftype_v2hi_v2hi,
5854 BFIN_BUILTIN_SSADDSUB_2X16);
5855 def_builtin ("__builtin_bfin_dspsubaddsat", v2hi_ftype_v2hi_v2hi,
5856 BFIN_BUILTIN_SSSUBADD_2X16);
5857 def_builtin ("__builtin_bfin_mult_fr2x16", v2hi_ftype_v2hi_v2hi,
5858 BFIN_BUILTIN_MULT_2X16);
5859 def_builtin ("__builtin_bfin_multr_fr2x16", v2hi_ftype_v2hi_v2hi,
5860 BFIN_BUILTIN_MULTR_2X16);
5861 def_builtin ("__builtin_bfin_negate_fr2x16", v2hi_ftype_v2hi,
5862 BFIN_BUILTIN_NEG_2X16);
5863 def_builtin ("__builtin_bfin_abs_fr2x16", v2hi_ftype_v2hi,
5864 BFIN_BUILTIN_ABS_2X16);
5865
5866 def_builtin ("__builtin_bfin_min_fr1x16", short_ftype_int_int,
5867 BFIN_BUILTIN_MIN_1X16);
5868 def_builtin ("__builtin_bfin_max_fr1x16", short_ftype_int_int,
5869 BFIN_BUILTIN_MAX_1X16);
5870
5871 def_builtin ("__builtin_bfin_add_fr1x16", short_ftype_int_int,
5872 BFIN_BUILTIN_SSADD_1X16);
5873 def_builtin ("__builtin_bfin_sub_fr1x16", short_ftype_int_int,
5874 BFIN_BUILTIN_SSSUB_1X16);
5875 def_builtin ("__builtin_bfin_mult_fr1x16", short_ftype_int_int,
5876 BFIN_BUILTIN_MULT_1X16);
5877 def_builtin ("__builtin_bfin_multr_fr1x16", short_ftype_int_int,
5878 BFIN_BUILTIN_MULTR_1X16);
5879 def_builtin ("__builtin_bfin_negate_fr1x16", short_ftype_short,
5880 BFIN_BUILTIN_NEG_1X16);
5881 def_builtin ("__builtin_bfin_abs_fr1x16", short_ftype_short,
5882 BFIN_BUILTIN_ABS_1X16);
5883 def_builtin ("__builtin_bfin_norm_fr1x16", short_ftype_int,
5884 BFIN_BUILTIN_NORM_1X16);
5885
5886 def_builtin ("__builtin_bfin_sum_fr2x16", short_ftype_v2hi,
5887 BFIN_BUILTIN_SUM_2X16);
5888 def_builtin ("__builtin_bfin_diff_hl_fr2x16", short_ftype_v2hi,
5889 BFIN_BUILTIN_DIFFHL_2X16);
5890 def_builtin ("__builtin_bfin_diff_lh_fr2x16", short_ftype_v2hi,
5891 BFIN_BUILTIN_DIFFLH_2X16);
5892
5893 def_builtin ("__builtin_bfin_mulhisill", int_ftype_v2hi_v2hi,
5894 BFIN_BUILTIN_MULHISILL);
5895 def_builtin ("__builtin_bfin_mulhisihl", int_ftype_v2hi_v2hi,
5896 BFIN_BUILTIN_MULHISIHL);
5897 def_builtin ("__builtin_bfin_mulhisilh", int_ftype_v2hi_v2hi,
5898 BFIN_BUILTIN_MULHISILH);
5899 def_builtin ("__builtin_bfin_mulhisihh", int_ftype_v2hi_v2hi,
5900 BFIN_BUILTIN_MULHISIHH);
5901
5902 def_builtin ("__builtin_bfin_min_fr1x32", int_ftype_int_int,
5903 BFIN_BUILTIN_MIN_1X32);
5904 def_builtin ("__builtin_bfin_max_fr1x32", int_ftype_int_int,
5905 BFIN_BUILTIN_MAX_1X32);
5906
5907 def_builtin ("__builtin_bfin_add_fr1x32", int_ftype_int_int,
5908 BFIN_BUILTIN_SSADD_1X32);
5909 def_builtin ("__builtin_bfin_sub_fr1x32", int_ftype_int_int,
5910 BFIN_BUILTIN_SSSUB_1X32);
5911 def_builtin ("__builtin_bfin_negate_fr1x32", int_ftype_int,
5912 BFIN_BUILTIN_NEG_1X32);
5913 def_builtin ("__builtin_bfin_abs_fr1x32", int_ftype_int,
5914 BFIN_BUILTIN_ABS_1X32);
5915 def_builtin ("__builtin_bfin_norm_fr1x32", short_ftype_int,
5916 BFIN_BUILTIN_NORM_1X32);
5917 def_builtin ("__builtin_bfin_round_fr1x32", short_ftype_int,
5918 BFIN_BUILTIN_ROUND_1X32);
5919 def_builtin ("__builtin_bfin_mult_fr1x32", int_ftype_short_short,
5920 BFIN_BUILTIN_MULT_1X32);
5921 def_builtin ("__builtin_bfin_mult_fr1x32x32", int_ftype_int_int,
5922 BFIN_BUILTIN_MULT_1X32X32);
5923 def_builtin ("__builtin_bfin_mult_fr1x32x32NS", int_ftype_int_int,
5924 BFIN_BUILTIN_MULT_1X32X32NS);
5925
5926 /* Shifts. */
5927 def_builtin ("__builtin_bfin_shl_fr1x16", short_ftype_int_int,
5928 BFIN_BUILTIN_SSASHIFT_1X16);
5929 def_builtin ("__builtin_bfin_shl_fr2x16", v2hi_ftype_v2hi_int,
5930 BFIN_BUILTIN_SSASHIFT_2X16);
5931 def_builtin ("__builtin_bfin_lshl_fr1x16", short_ftype_int_int,
5932 BFIN_BUILTIN_LSHIFT_1X16);
5933 def_builtin ("__builtin_bfin_lshl_fr2x16", v2hi_ftype_v2hi_int,
5934 BFIN_BUILTIN_LSHIFT_2X16);
5935 def_builtin ("__builtin_bfin_shl_fr1x32", int_ftype_int_int,
5936 BFIN_BUILTIN_SSASHIFT_1X32);
5937
5938 /* Complex numbers. */
5939 def_builtin ("__builtin_bfin_cmplx_add", v2hi_ftype_v2hi_v2hi,
5940 BFIN_BUILTIN_SSADD_2X16);
5941 def_builtin ("__builtin_bfin_cmplx_sub", v2hi_ftype_v2hi_v2hi,
5942 BFIN_BUILTIN_SSSUB_2X16);
5943 def_builtin ("__builtin_bfin_cmplx_mul", v2hi_ftype_v2hi_v2hi,
5944 BFIN_BUILTIN_CPLX_MUL_16);
5945 def_builtin ("__builtin_bfin_cmplx_mac", v2hi_ftype_v2hi_v2hi_v2hi,
5946 BFIN_BUILTIN_CPLX_MAC_16);
5947 def_builtin ("__builtin_bfin_cmplx_msu", v2hi_ftype_v2hi_v2hi_v2hi,
5948 BFIN_BUILTIN_CPLX_MSU_16);
5949 def_builtin ("__builtin_bfin_cmplx_mul_s40", v2hi_ftype_v2hi_v2hi,
5950 BFIN_BUILTIN_CPLX_MUL_16_S40);
5951 def_builtin ("__builtin_bfin_cmplx_mac_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5952 BFIN_BUILTIN_CPLX_MAC_16_S40);
5953 def_builtin ("__builtin_bfin_cmplx_msu_s40", v2hi_ftype_v2hi_v2hi_v2hi,
5954 BFIN_BUILTIN_CPLX_MSU_16_S40);
5955 def_builtin ("__builtin_bfin_csqu_fr16", v2hi_ftype_v2hi,
5956 BFIN_BUILTIN_CPLX_SQU);
5957
5958 /* "Unaligned" load. */
5959 def_builtin ("__builtin_bfin_loadbytes", int_ftype_pint,
5960 BFIN_BUILTIN_LOADBYTES);
5961
5962 }
5963
5964
5965 struct builtin_description
5966 {
5967 const enum insn_code icode;
5968 const char *const name;
5969 const enum bfin_builtins code;
5970 int macflag;
5971 };
5972
5973 static const struct builtin_description bdesc_2arg[] =
5974 {
5975 { CODE_FOR_composev2hi, "__builtin_bfin_compose_2x16", BFIN_BUILTIN_COMPOSE_2X16, -1 },
5976
5977 { CODE_FOR_ssashiftv2hi3, "__builtin_bfin_shl_fr2x16", BFIN_BUILTIN_SSASHIFT_2X16, -1 },
5978 { CODE_FOR_ssashifthi3, "__builtin_bfin_shl_fr1x16", BFIN_BUILTIN_SSASHIFT_1X16, -1 },
5979 { CODE_FOR_lshiftv2hi3, "__builtin_bfin_lshl_fr2x16", BFIN_BUILTIN_LSHIFT_2X16, -1 },
5980 { CODE_FOR_lshifthi3, "__builtin_bfin_lshl_fr1x16", BFIN_BUILTIN_LSHIFT_1X16, -1 },
5981 { CODE_FOR_ssashiftsi3, "__builtin_bfin_shl_fr1x32", BFIN_BUILTIN_SSASHIFT_1X32, -1 },
5982
5983 { CODE_FOR_sminhi3, "__builtin_bfin_min_fr1x16", BFIN_BUILTIN_MIN_1X16, -1 },
5984 { CODE_FOR_smaxhi3, "__builtin_bfin_max_fr1x16", BFIN_BUILTIN_MAX_1X16, -1 },
5985 { CODE_FOR_ssaddhi3, "__builtin_bfin_add_fr1x16", BFIN_BUILTIN_SSADD_1X16, -1 },
5986 { CODE_FOR_sssubhi3, "__builtin_bfin_sub_fr1x16", BFIN_BUILTIN_SSSUB_1X16, -1 },
5987
5988 { CODE_FOR_sminsi3, "__builtin_bfin_min_fr1x32", BFIN_BUILTIN_MIN_1X32, -1 },
5989 { CODE_FOR_smaxsi3, "__builtin_bfin_max_fr1x32", BFIN_BUILTIN_MAX_1X32, -1 },
5990 { CODE_FOR_ssaddsi3, "__builtin_bfin_add_fr1x32", BFIN_BUILTIN_SSADD_1X32, -1 },
5991 { CODE_FOR_sssubsi3, "__builtin_bfin_sub_fr1x32", BFIN_BUILTIN_SSSUB_1X32, -1 },
5992
5993 { CODE_FOR_sminv2hi3, "__builtin_bfin_min_fr2x16", BFIN_BUILTIN_MIN_2X16, -1 },
5994 { CODE_FOR_smaxv2hi3, "__builtin_bfin_max_fr2x16", BFIN_BUILTIN_MAX_2X16, -1 },
5995 { CODE_FOR_ssaddv2hi3, "__builtin_bfin_add_fr2x16", BFIN_BUILTIN_SSADD_2X16, -1 },
5996 { CODE_FOR_sssubv2hi3, "__builtin_bfin_sub_fr2x16", BFIN_BUILTIN_SSSUB_2X16, -1 },
5997 { CODE_FOR_ssaddsubv2hi3, "__builtin_bfin_dspaddsubsat", BFIN_BUILTIN_SSADDSUB_2X16, -1 },
5998 { CODE_FOR_sssubaddv2hi3, "__builtin_bfin_dspsubaddsat", BFIN_BUILTIN_SSSUBADD_2X16, -1 },
5999
6000 { CODE_FOR_flag_mulhisi, "__builtin_bfin_mult_fr1x32", BFIN_BUILTIN_MULT_1X32, MACFLAG_NONE },
6001 { CODE_FOR_flag_mulhi, "__builtin_bfin_mult_fr1x16", BFIN_BUILTIN_MULT_1X16, MACFLAG_T },
6002 { CODE_FOR_flag_mulhi, "__builtin_bfin_multr_fr1x16", BFIN_BUILTIN_MULTR_1X16, MACFLAG_NONE },
6003 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_mult_fr2x16", BFIN_BUILTIN_MULT_2X16, MACFLAG_T },
6004 { CODE_FOR_flag_mulv2hi, "__builtin_bfin_multr_fr2x16", BFIN_BUILTIN_MULTR_2X16, MACFLAG_NONE },
6005
6006 { CODE_FOR_mulhisi_ll, "__builtin_bfin_mulhisill", BFIN_BUILTIN_MULHISILL, -1 },
6007 { CODE_FOR_mulhisi_lh, "__builtin_bfin_mulhisilh", BFIN_BUILTIN_MULHISILH, -1 },
6008 { CODE_FOR_mulhisi_hl, "__builtin_bfin_mulhisihl", BFIN_BUILTIN_MULHISIHL, -1 },
6009 { CODE_FOR_mulhisi_hh, "__builtin_bfin_mulhisihh", BFIN_BUILTIN_MULHISIHH, -1 }
6010
6011 };
6012
6013 static const struct builtin_description bdesc_1arg[] =
6014 {
6015 { CODE_FOR_loadbytes, "__builtin_bfin_loadbytes", BFIN_BUILTIN_LOADBYTES, 0 },
6016
6017 { CODE_FOR_ones, "__builtin_bfin_ones", BFIN_BUILTIN_ONES, 0 },
6018
6019 { CODE_FOR_signbitshi2, "__builtin_bfin_norm_fr1x16", BFIN_BUILTIN_NORM_1X16, 0 },
6020 { CODE_FOR_ssneghi2, "__builtin_bfin_negate_fr1x16", BFIN_BUILTIN_NEG_1X16, 0 },
6021 { CODE_FOR_abshi2, "__builtin_bfin_abs_fr1x16", BFIN_BUILTIN_ABS_1X16, 0 },
6022
6023 { CODE_FOR_signbitssi2, "__builtin_bfin_norm_fr1x32", BFIN_BUILTIN_NORM_1X32, 0 },
6024 { CODE_FOR_ssroundsi2, "__builtin_bfin_round_fr1x32", BFIN_BUILTIN_ROUND_1X32, 0 },
6025 { CODE_FOR_ssnegsi2, "__builtin_bfin_negate_fr1x32", BFIN_BUILTIN_NEG_1X32, 0 },
6026 { CODE_FOR_ssabssi2, "__builtin_bfin_abs_fr1x32", BFIN_BUILTIN_ABS_1X32, 0 },
6027
6028 { CODE_FOR_movv2hi_hi_low, "__builtin_bfin_extract_lo", BFIN_BUILTIN_EXTRACTLO, 0 },
6029 { CODE_FOR_movv2hi_hi_high, "__builtin_bfin_extract_hi", BFIN_BUILTIN_EXTRACTHI, 0 },
6030 { CODE_FOR_ssnegv2hi2, "__builtin_bfin_negate_fr2x16", BFIN_BUILTIN_NEG_2X16, 0 },
6031 { CODE_FOR_ssabsv2hi2, "__builtin_bfin_abs_fr2x16", BFIN_BUILTIN_ABS_2X16, 0 }
6032 };
6033
6034 /* Errors in the source file can cause expand_expr to return const0_rtx
6035 where we expect a vector. To avoid crashing, use one of the vector
6036 clear instructions. */
6037 static rtx
6038 safe_vector_operand (rtx x, enum machine_mode mode)
6039 {
6040 if (x != const0_rtx)
6041 return x;
6042 x = gen_reg_rtx (SImode);
6043
6044 emit_insn (gen_movsi (x, CONST0_RTX (SImode)));
6045 return gen_lowpart (mode, x);
6046 }
6047
6048 /* Subroutine of bfin_expand_builtin to take care of binop insns. MACFLAG is -1
6049 if this is a normal binary op, or one of the MACFLAG_xxx constants. */
6050
6051 static rtx
6052 bfin_expand_binop_builtin (enum insn_code icode, tree exp, rtx target,
6053 int macflag)
6054 {
6055 rtx pat;
6056 tree arg0 = CALL_EXPR_ARG (exp, 0);
6057 tree arg1 = CALL_EXPR_ARG (exp, 1);
6058 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6059 rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6060 enum machine_mode op0mode = GET_MODE (op0);
6061 enum machine_mode op1mode = GET_MODE (op1);
6062 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6063 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6064 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
6065
6066 if (VECTOR_MODE_P (mode0))
6067 op0 = safe_vector_operand (op0, mode0);
6068 if (VECTOR_MODE_P (mode1))
6069 op1 = safe_vector_operand (op1, mode1);
6070
6071 if (! target
6072 || GET_MODE (target) != tmode
6073 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6074 target = gen_reg_rtx (tmode);
6075
6076 if ((op0mode == SImode || op0mode == VOIDmode) && mode0 == HImode)
6077 {
6078 op0mode = HImode;
6079 op0 = gen_lowpart (HImode, op0);
6080 }
6081 if ((op1mode == SImode || op1mode == VOIDmode) && mode1 == HImode)
6082 {
6083 op1mode = HImode;
6084 op1 = gen_lowpart (HImode, op1);
6085 }
6086 /* In case the insn wants input operands in modes different from
6087 the result, abort. */
6088 gcc_assert ((op0mode == mode0 || op0mode == VOIDmode)
6089 && (op1mode == mode1 || op1mode == VOIDmode));
6090
6091 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6092 op0 = copy_to_mode_reg (mode0, op0);
6093 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
6094 op1 = copy_to_mode_reg (mode1, op1);
6095
6096 if (macflag == -1)
6097 pat = GEN_FCN (icode) (target, op0, op1);
6098 else
6099 pat = GEN_FCN (icode) (target, op0, op1, GEN_INT (macflag));
6100 if (! pat)
6101 return 0;
6102
6103 emit_insn (pat);
6104 return target;
6105 }
6106
6107 /* Subroutine of bfin_expand_builtin to take care of unop insns. */
6108
6109 static rtx
6110 bfin_expand_unop_builtin (enum insn_code icode, tree exp,
6111 rtx target)
6112 {
6113 rtx pat;
6114 tree arg0 = CALL_EXPR_ARG (exp, 0);
6115 rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6116 enum machine_mode op0mode = GET_MODE (op0);
6117 enum machine_mode tmode = insn_data[icode].operand[0].mode;
6118 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
6119
6120 if (! target
6121 || GET_MODE (target) != tmode
6122 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6123 target = gen_reg_rtx (tmode);
6124
6125 if (VECTOR_MODE_P (mode0))
6126 op0 = safe_vector_operand (op0, mode0);
6127
6128 if (op0mode == SImode && mode0 == HImode)
6129 {
6130 op0mode = HImode;
6131 op0 = gen_lowpart (HImode, op0);
6132 }
6133 gcc_assert (op0mode == mode0 || op0mode == VOIDmode);
6134
6135 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6136 op0 = copy_to_mode_reg (mode0, op0);
6137
6138 pat = GEN_FCN (icode) (target, op0);
6139 if (! pat)
6140 return 0;
6141 emit_insn (pat);
6142 return target;
6143 }
6144
6145 /* Expand an expression EXP that calls a built-in function,
6146 with result going to TARGET if that's convenient
6147 (and in mode MODE if that's convenient).
6148 SUBTARGET may be used as the target for computing one of EXP's operands.
6149 IGNORE is nonzero if the value is to be ignored. */
6150
6151 static rtx
6152 bfin_expand_builtin (tree exp, rtx target ATTRIBUTE_UNUSED,
6153 rtx subtarget ATTRIBUTE_UNUSED,
6154 enum machine_mode mode ATTRIBUTE_UNUSED,
6155 int ignore ATTRIBUTE_UNUSED)
6156 {
6157 size_t i;
6158 enum insn_code icode;
6159 const struct builtin_description *d;
6160 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6161 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6162 tree arg0, arg1, arg2;
6163 rtx op0, op1, op2, accvec, pat, tmp1, tmp2, a0reg, a1reg;
6164 enum machine_mode tmode, mode0;
6165
6166 switch (fcode)
6167 {
6168 case BFIN_BUILTIN_CSYNC:
6169 emit_insn (gen_csync ());
6170 return 0;
6171 case BFIN_BUILTIN_SSYNC:
6172 emit_insn (gen_ssync ());
6173 return 0;
6174
6175 case BFIN_BUILTIN_DIFFHL_2X16:
6176 case BFIN_BUILTIN_DIFFLH_2X16:
6177 case BFIN_BUILTIN_SUM_2X16:
6178 arg0 = CALL_EXPR_ARG (exp, 0);
6179 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6180 icode = (fcode == BFIN_BUILTIN_DIFFHL_2X16 ? CODE_FOR_subhilov2hi3
6181 : fcode == BFIN_BUILTIN_DIFFLH_2X16 ? CODE_FOR_sublohiv2hi3
6182 : CODE_FOR_ssaddhilov2hi3);
6183 tmode = insn_data[icode].operand[0].mode;
6184 mode0 = insn_data[icode].operand[1].mode;
6185
6186 if (! target
6187 || GET_MODE (target) != tmode
6188 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
6189 target = gen_reg_rtx (tmode);
6190
6191 if (VECTOR_MODE_P (mode0))
6192 op0 = safe_vector_operand (op0, mode0);
6193
6194 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
6195 op0 = copy_to_mode_reg (mode0, op0);
6196
6197 pat = GEN_FCN (icode) (target, op0, op0);
6198 if (! pat)
6199 return 0;
6200 emit_insn (pat);
6201 return target;
6202
6203 case BFIN_BUILTIN_MULT_1X32X32:
6204 case BFIN_BUILTIN_MULT_1X32X32NS:
6205 arg0 = CALL_EXPR_ARG (exp, 0);
6206 arg1 = CALL_EXPR_ARG (exp, 1);
6207 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6208 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6209 if (! target
6210 || !register_operand (target, SImode))
6211 target = gen_reg_rtx (SImode);
6212
6213 a1reg = gen_rtx_REG (PDImode, REG_A1);
6214 a0reg = gen_rtx_REG (PDImode, REG_A0);
6215 tmp1 = gen_lowpart (V2HImode, op0);
6216 tmp2 = gen_lowpart (V2HImode, op1);
6217 emit_insn (gen_flag_macinit1hi (a1reg,
6218 gen_lowpart (HImode, op0),
6219 gen_lowpart (HImode, op1),
6220 GEN_INT (MACFLAG_FU)));
6221 emit_insn (gen_lshrpdi3 (a1reg, a1reg, GEN_INT (16)));
6222
6223 if (fcode == BFIN_BUILTIN_MULT_1X32X32)
6224 emit_insn (gen_flag_mul_macv2hi_parts_acconly (a0reg, a1reg, tmp1, tmp2,
6225 const1_rtx, const1_rtx,
6226 const1_rtx, const0_rtx, a1reg,
6227 const0_rtx, GEN_INT (MACFLAG_NONE),
6228 GEN_INT (MACFLAG_M)));
6229 else
6230 {
6231 /* For saturating multiplication, there's exactly one special case
6232 to be handled: multiplying the smallest negative value with
6233 itself. Due to shift correction in fractional multiplies, this
6234 can overflow. Iff this happens, OP2 will contain 1, which, when
6235 added in 32 bits to the smallest negative, wraps to the largest
6236 positive, which is the result we want. */
6237 op2 = gen_reg_rtx (V2HImode);
6238 emit_insn (gen_packv2hi (op2, tmp1, tmp2, const0_rtx, const0_rtx));
6239 emit_insn (gen_movsibi (gen_rtx_REG (BImode, REG_CC),
6240 gen_lowpart (SImode, op2)));
6241 emit_insn (gen_flag_mul_macv2hi_parts_acconly_andcc0 (a0reg, a1reg, tmp1, tmp2,
6242 const1_rtx, const1_rtx,
6243 const1_rtx, const0_rtx, a1reg,
6244 const0_rtx, GEN_INT (MACFLAG_NONE),
6245 GEN_INT (MACFLAG_M)));
6246 op2 = gen_reg_rtx (SImode);
6247 emit_insn (gen_movbisi (op2, gen_rtx_REG (BImode, REG_CC)));
6248 }
6249 emit_insn (gen_flag_machi_parts_acconly (a1reg, tmp2, tmp1,
6250 const1_rtx, const0_rtx,
6251 a1reg, const0_rtx, GEN_INT (MACFLAG_M)));
6252 emit_insn (gen_ashrpdi3 (a1reg, a1reg, GEN_INT (15)));
6253 emit_insn (gen_sum_of_accumulators (target, a0reg, a0reg, a1reg));
6254 if (fcode == BFIN_BUILTIN_MULT_1X32X32NS)
6255 emit_insn (gen_addsi3 (target, target, op2));
6256 return target;
6257
6258 case BFIN_BUILTIN_CPLX_MUL_16:
6259 case BFIN_BUILTIN_CPLX_MUL_16_S40:
6260 arg0 = CALL_EXPR_ARG (exp, 0);
6261 arg1 = CALL_EXPR_ARG (exp, 1);
6262 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6263 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6264 accvec = gen_reg_rtx (V2PDImode);
6265
6266 if (! target
6267 || GET_MODE (target) != V2HImode
6268 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6269 target = gen_reg_rtx (tmode);
6270 if (! register_operand (op0, GET_MODE (op0)))
6271 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6272 if (! register_operand (op1, GET_MODE (op1)))
6273 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6274
6275 if (fcode == BFIN_BUILTIN_CPLX_MUL_16)
6276 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6277 const0_rtx, const0_rtx,
6278 const1_rtx, GEN_INT (MACFLAG_W32)));
6279 else
6280 emit_insn (gen_flag_macinit1v2hi_parts (accvec, op0, op1, const0_rtx,
6281 const0_rtx, const0_rtx,
6282 const1_rtx, GEN_INT (MACFLAG_NONE)));
6283 emit_insn (gen_flag_macv2hi_parts (target, op0, op1, const1_rtx,
6284 const1_rtx, const1_rtx,
6285 const0_rtx, accvec, const1_rtx, const0_rtx,
6286 GEN_INT (MACFLAG_NONE), accvec));
6287
6288 return target;
6289
6290 case BFIN_BUILTIN_CPLX_MAC_16:
6291 case BFIN_BUILTIN_CPLX_MSU_16:
6292 case BFIN_BUILTIN_CPLX_MAC_16_S40:
6293 case BFIN_BUILTIN_CPLX_MSU_16_S40:
6294 arg0 = CALL_EXPR_ARG (exp, 0);
6295 arg1 = CALL_EXPR_ARG (exp, 1);
6296 arg2 = CALL_EXPR_ARG (exp, 2);
6297 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6298 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
6299 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
6300 accvec = gen_reg_rtx (V2PDImode);
6301
6302 if (! target
6303 || GET_MODE (target) != V2HImode
6304 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6305 target = gen_reg_rtx (tmode);
6306 if (! register_operand (op1, GET_MODE (op1)))
6307 op1 = copy_to_mode_reg (GET_MODE (op1), op1);
6308 if (! register_operand (op2, GET_MODE (op2)))
6309 op2 = copy_to_mode_reg (GET_MODE (op2), op2);
6310
6311 tmp1 = gen_reg_rtx (SImode);
6312 tmp2 = gen_reg_rtx (SImode);
6313 emit_insn (gen_ashlsi3 (tmp1, gen_lowpart (SImode, op0), GEN_INT (16)));
6314 emit_move_insn (tmp2, gen_lowpart (SImode, op0));
6315 emit_insn (gen_movstricthi_1 (gen_lowpart (HImode, tmp2), const0_rtx));
6316 emit_insn (gen_load_accumulator_pair (accvec, tmp1, tmp2));
6317 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6318 || fcode == BFIN_BUILTIN_CPLX_MSU_16)
6319 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6320 const0_rtx, const0_rtx,
6321 const1_rtx, accvec, const0_rtx,
6322 const0_rtx,
6323 GEN_INT (MACFLAG_W32)));
6324 else
6325 emit_insn (gen_flag_macv2hi_parts_acconly (accvec, op1, op2, const0_rtx,
6326 const0_rtx, const0_rtx,
6327 const1_rtx, accvec, const0_rtx,
6328 const0_rtx,
6329 GEN_INT (MACFLAG_NONE)));
6330 if (fcode == BFIN_BUILTIN_CPLX_MAC_16
6331 || fcode == BFIN_BUILTIN_CPLX_MAC_16_S40)
6332 {
6333 tmp1 = const1_rtx;
6334 tmp2 = const0_rtx;
6335 }
6336 else
6337 {
6338 tmp1 = const0_rtx;
6339 tmp2 = const1_rtx;
6340 }
6341 emit_insn (gen_flag_macv2hi_parts (target, op1, op2, const1_rtx,
6342 const1_rtx, const1_rtx,
6343 const0_rtx, accvec, tmp1, tmp2,
6344 GEN_INT (MACFLAG_NONE), accvec));
6345
6346 return target;
6347
6348 case BFIN_BUILTIN_CPLX_SQU:
6349 arg0 = CALL_EXPR_ARG (exp, 0);
6350 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
6351 accvec = gen_reg_rtx (V2PDImode);
6352 icode = CODE_FOR_flag_mulv2hi;
6353 tmp1 = gen_reg_rtx (V2HImode);
6354 tmp2 = gen_reg_rtx (V2HImode);
6355
6356 if (! target
6357 || GET_MODE (target) != V2HImode
6358 || ! (*insn_data[icode].operand[0].predicate) (target, V2HImode))
6359 target = gen_reg_rtx (V2HImode);
6360 if (! register_operand (op0, GET_MODE (op0)))
6361 op0 = copy_to_mode_reg (GET_MODE (op0), op0);
6362
6363 emit_insn (gen_flag_mulv2hi (tmp1, op0, op0, GEN_INT (MACFLAG_NONE)));
6364
6365 emit_insn (gen_flag_mulhi_parts (gen_lowpart (HImode, tmp2), op0, op0,
6366 const0_rtx, const1_rtx,
6367 GEN_INT (MACFLAG_NONE)));
6368
6369 emit_insn (gen_ssaddhi3_high_parts (target, tmp2, tmp2, tmp2, const0_rtx,
6370 const0_rtx));
6371 emit_insn (gen_sssubhi3_low_parts (target, target, tmp1, tmp1,
6372 const0_rtx, const1_rtx));
6373
6374 return target;
6375
6376 default:
6377 break;
6378 }
6379
6380 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
6381 if (d->code == fcode)
6382 return bfin_expand_binop_builtin (d->icode, exp, target,
6383 d->macflag);
6384
6385 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
6386 if (d->code == fcode)
6387 return bfin_expand_unop_builtin (d->icode, exp, target);
6388
6389 gcc_unreachable ();
6390 }
6391 \f
6392 #undef TARGET_INIT_BUILTINS
6393 #define TARGET_INIT_BUILTINS bfin_init_builtins
6394
6395 #undef TARGET_EXPAND_BUILTIN
6396 #define TARGET_EXPAND_BUILTIN bfin_expand_builtin
6397
6398 #undef TARGET_ASM_GLOBALIZE_LABEL
6399 #define TARGET_ASM_GLOBALIZE_LABEL bfin_globalize_label
6400
6401 #undef TARGET_ASM_FILE_START
6402 #define TARGET_ASM_FILE_START output_file_start
6403
6404 #undef TARGET_ATTRIBUTE_TABLE
6405 #define TARGET_ATTRIBUTE_TABLE bfin_attribute_table
6406
6407 #undef TARGET_COMP_TYPE_ATTRIBUTES
6408 #define TARGET_COMP_TYPE_ATTRIBUTES bfin_comp_type_attributes
6409
6410 #undef TARGET_RTX_COSTS
6411 #define TARGET_RTX_COSTS bfin_rtx_costs
6412
6413 #undef TARGET_ADDRESS_COST
6414 #define TARGET_ADDRESS_COST bfin_address_cost
6415
6416 #undef TARGET_ASM_INTEGER
6417 #define TARGET_ASM_INTEGER bfin_assemble_integer
6418
6419 #undef TARGET_MACHINE_DEPENDENT_REORG
6420 #define TARGET_MACHINE_DEPENDENT_REORG bfin_reorg
6421
6422 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
6423 #define TARGET_FUNCTION_OK_FOR_SIBCALL bfin_function_ok_for_sibcall
6424
6425 #undef TARGET_ASM_OUTPUT_MI_THUNK
6426 #define TARGET_ASM_OUTPUT_MI_THUNK bfin_output_mi_thunk
6427 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
6428 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK hook_bool_const_tree_hwi_hwi_const_tree_true
6429
6430 #undef TARGET_SCHED_ADJUST_COST
6431 #define TARGET_SCHED_ADJUST_COST bfin_adjust_cost
6432
6433 #undef TARGET_SCHED_ISSUE_RATE
6434 #define TARGET_SCHED_ISSUE_RATE bfin_issue_rate
6435
6436 #undef TARGET_PROMOTE_FUNCTION_MODE
6437 #define TARGET_PROMOTE_FUNCTION_MODE default_promote_function_mode_always_promote
6438
6439 #undef TARGET_ARG_PARTIAL_BYTES
6440 #define TARGET_ARG_PARTIAL_BYTES bfin_arg_partial_bytes
6441
6442 #undef TARGET_PASS_BY_REFERENCE
6443 #define TARGET_PASS_BY_REFERENCE bfin_pass_by_reference
6444
6445 #undef TARGET_SETUP_INCOMING_VARARGS
6446 #define TARGET_SETUP_INCOMING_VARARGS setup_incoming_varargs
6447
6448 #undef TARGET_STRUCT_VALUE_RTX
6449 #define TARGET_STRUCT_VALUE_RTX bfin_struct_value_rtx
6450
6451 #undef TARGET_VECTOR_MODE_SUPPORTED_P
6452 #define TARGET_VECTOR_MODE_SUPPORTED_P bfin_vector_mode_supported_p
6453
6454 #undef TARGET_HANDLE_OPTION
6455 #define TARGET_HANDLE_OPTION bfin_handle_option
6456
6457 #undef TARGET_DEFAULT_TARGET_FLAGS
6458 #define TARGET_DEFAULT_TARGET_FLAGS TARGET_DEFAULT
6459
6460 #undef TARGET_SECONDARY_RELOAD
6461 #define TARGET_SECONDARY_RELOAD bfin_secondary_reload
6462
6463 #undef TARGET_DELEGITIMIZE_ADDRESS
6464 #define TARGET_DELEGITIMIZE_ADDRESS bfin_delegitimize_address
6465
6466 #undef TARGET_CANNOT_FORCE_CONST_MEM
6467 #define TARGET_CANNOT_FORCE_CONST_MEM bfin_cannot_force_const_mem
6468
6469 #undef TARGET_RETURN_IN_MEMORY
6470 #define TARGET_RETURN_IN_MEMORY bfin_return_in_memory
6471
6472 #undef TARGET_LEGITIMATE_ADDRESS_P
6473 #define TARGET_LEGITIMATE_ADDRESS_P bfin_legitimate_address_p
6474
6475 #undef TARGET_FRAME_POINTER_REQUIRED
6476 #define TARGET_FRAME_POINTER_REQUIRED bfin_frame_pointer_required
6477
6478 #undef TARGET_CAN_ELIMINATE
6479 #define TARGET_CAN_ELIMINATE bfin_can_eliminate
6480
6481 struct gcc_target targetm = TARGET_INITIALIZER;