alpha.c (alpha_build_builtin_va_list): Pass location to build_decl.
[gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007, 2008, 2009 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "machmode.h"
52 #include "gimple.h"
53 #include "tm-constrs.h"
54 #include "ddg.h"
55 #include "sbitmap.h"
56 #include "timevar.h"
57 #include "df.h"
58
59 /* Builtin types, data and prototypes. */
60
61 enum spu_builtin_type_index
62 {
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112 };
113
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
127 struct spu_builtin_range
128 {
129 int low, high;
130 };
131
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145 };
146
147 \f
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151 /* Prototypes and external defs. */
152 static void spu_init_builtins (void);
153 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
154 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
155 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
156 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
157 static rtx get_pic_reg (void);
158 static int need_to_save_reg (int regno, int saving);
159 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
160 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
161 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
162 rtx scratch);
163 static void emit_nop_for_insn (rtx insn);
164 static bool insn_clobbers_hbr (rtx insn);
165 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
166 int distance, sbitmap blocks);
167 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
168 enum machine_mode dmode);
169 static rtx get_branch_target (rtx branch);
170 static void spu_machine_dependent_reorg (void);
171 static int spu_sched_issue_rate (void);
172 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
173 int can_issue_more);
174 static int get_pipe (rtx insn);
175 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
176 static void spu_sched_init_global (FILE *, int, int);
177 static void spu_sched_init (FILE *, int, int);
178 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
179 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
180 int flags,
181 unsigned char *no_add_attrs);
182 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
183 int flags,
184 unsigned char *no_add_attrs);
185 static int spu_naked_function_p (tree func);
186 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
187 const_tree type, unsigned char named);
188 static tree spu_build_builtin_va_list (void);
189 static void spu_va_start (tree, rtx);
190 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
191 gimple_seq * pre_p, gimple_seq * post_p);
192 static int store_with_one_insn_p (rtx mem);
193 static int mem_is_padded_component_ref (rtx x);
194 static int reg_aligned_for_addr (rtx x);
195 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
196 static void spu_asm_globalize_label (FILE * file, const char *name);
197 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
198 int *total, bool speed);
199 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
200 static void spu_init_libfuncs (void);
201 static bool spu_return_in_memory (const_tree type, const_tree fntype);
202 static void fix_range (const char *);
203 static void spu_encode_section_info (tree, rtx, int);
204 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
205 static tree spu_builtin_mul_widen_even (tree);
206 static tree spu_builtin_mul_widen_odd (tree);
207 static tree spu_builtin_mask_for_load (void);
208 static int spu_builtin_vectorization_cost (bool);
209 static bool spu_vector_alignment_reachable (const_tree, bool);
210 static tree spu_builtin_vec_perm (tree, tree *);
211 static int spu_sms_res_mii (struct ddg *g);
212 static void asm_file_start (void);
213 static unsigned int spu_section_type_flags (tree, const char *, int);
214 static rtx spu_expand_load (rtx, rtx, rtx, int);
215
216 extern const char *reg_names[];
217
218 /* Which instruction set architecture to use. */
219 int spu_arch;
220 /* Which cpu are we tuning for. */
221 int spu_tune;
222
223 /* The hardware requires 8 insns between a hint and the branch it
224 effects. This variable describes how many rtl instructions the
225 compiler needs to see before inserting a hint, and then the compiler
226 will insert enough nops to make it at least 8 insns. The default is
227 for the compiler to allow up to 2 nops be emitted. The nops are
228 inserted in pairs, so we round down. */
229 int spu_hint_dist = (8*4) - (2*4);
230
231 /* Determines whether we run variable tracking in machine dependent
232 reorganization. */
233 static int spu_flag_var_tracking;
234
235 enum spu_immediate {
236 SPU_NONE,
237 SPU_IL,
238 SPU_ILA,
239 SPU_ILH,
240 SPU_ILHU,
241 SPU_ORI,
242 SPU_ORHI,
243 SPU_ORBI,
244 SPU_IOHL
245 };
246 enum immediate_class
247 {
248 IC_POOL, /* constant pool */
249 IC_IL1, /* one il* instruction */
250 IC_IL2, /* both ilhu and iohl instructions */
251 IC_IL1s, /* one il* instruction */
252 IC_IL2s, /* both ilhu and iohl instructions */
253 IC_FSMBI, /* the fsmbi instruction */
254 IC_CPAT, /* one of the c*d instructions */
255 IC_FSMBI2 /* fsmbi plus 1 other instruction */
256 };
257
258 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
259 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
260 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
261 static enum immediate_class classify_immediate (rtx op,
262 enum machine_mode mode);
263
264 static enum machine_mode spu_unwind_word_mode (void);
265
266 static enum machine_mode
267 spu_libgcc_cmp_return_mode (void);
268
269 static enum machine_mode
270 spu_libgcc_shift_count_mode (void);
271
272 \f
273 /* TARGET overrides. */
274
275 #undef TARGET_INIT_BUILTINS
276 #define TARGET_INIT_BUILTINS spu_init_builtins
277
278 #undef TARGET_EXPAND_BUILTIN
279 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
280
281 #undef TARGET_UNWIND_WORD_MODE
282 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
283
284 #undef TARGET_LEGITIMIZE_ADDRESS
285 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
286
287 /* The .8byte directive doesn't seem to work well for a 32 bit
288 architecture. */
289 #undef TARGET_ASM_UNALIGNED_DI_OP
290 #define TARGET_ASM_UNALIGNED_DI_OP NULL
291
292 #undef TARGET_RTX_COSTS
293 #define TARGET_RTX_COSTS spu_rtx_costs
294
295 #undef TARGET_ADDRESS_COST
296 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
297
298 #undef TARGET_SCHED_ISSUE_RATE
299 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
300
301 #undef TARGET_SCHED_INIT_GLOBAL
302 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
303
304 #undef TARGET_SCHED_INIT
305 #define TARGET_SCHED_INIT spu_sched_init
306
307 #undef TARGET_SCHED_VARIABLE_ISSUE
308 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
309
310 #undef TARGET_SCHED_REORDER
311 #define TARGET_SCHED_REORDER spu_sched_reorder
312
313 #undef TARGET_SCHED_REORDER2
314 #define TARGET_SCHED_REORDER2 spu_sched_reorder
315
316 #undef TARGET_SCHED_ADJUST_COST
317 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
318
319 EXPORTED_CONST struct attribute_spec spu_attribute_table[];
320 #undef TARGET_ATTRIBUTE_TABLE
321 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
322
323 #undef TARGET_ASM_INTEGER
324 #define TARGET_ASM_INTEGER spu_assemble_integer
325
326 #undef TARGET_SCALAR_MODE_SUPPORTED_P
327 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
328
329 #undef TARGET_VECTOR_MODE_SUPPORTED_P
330 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
331
332 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
333 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
334
335 #undef TARGET_ASM_GLOBALIZE_LABEL
336 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
337
338 #undef TARGET_PASS_BY_REFERENCE
339 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
340
341 #undef TARGET_MUST_PASS_IN_STACK
342 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
343
344 #undef TARGET_BUILD_BUILTIN_VA_LIST
345 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
346
347 #undef TARGET_EXPAND_BUILTIN_VA_START
348 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
349
350 #undef TARGET_SETUP_INCOMING_VARARGS
351 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
352
353 #undef TARGET_MACHINE_DEPENDENT_REORG
354 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
355
356 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
357 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
358
359 #undef TARGET_DEFAULT_TARGET_FLAGS
360 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
361
362 #undef TARGET_INIT_LIBFUNCS
363 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
364
365 #undef TARGET_RETURN_IN_MEMORY
366 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
367
368 #undef TARGET_ENCODE_SECTION_INFO
369 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
370
371 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
372 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
373
374 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
375 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
376
377 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
378 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
379
380 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
381 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
382
383 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
384 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
385
386 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
387 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
388
389 #undef TARGET_LIBGCC_CMP_RETURN_MODE
390 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
391
392 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
393 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
394
395 #undef TARGET_SCHED_SMS_RES_MII
396 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
397
398 #undef TARGET_ASM_FILE_START
399 #define TARGET_ASM_FILE_START asm_file_start
400
401 #undef TARGET_SECTION_TYPE_FLAGS
402 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
403
404 #undef TARGET_LEGITIMATE_ADDRESS_P
405 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
406
407 struct gcc_target targetm = TARGET_INITIALIZER;
408
409 void
410 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
411 {
412 /* Override some of the default param values. With so many registers
413 larger values are better for these params. */
414 MAX_PENDING_LIST_LENGTH = 128;
415
416 /* With so many registers this is better on by default. */
417 flag_rename_registers = 1;
418 }
419
420 /* Sometimes certain combinations of command options do not make sense
421 on a particular target machine. You can define a macro
422 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
423 executed once just after all the command options have been parsed. */
424 void
425 spu_override_options (void)
426 {
427 /* Small loops will be unpeeled at -O3. For SPU it is more important
428 to keep code small by default. */
429 if (!flag_unroll_loops && !flag_peel_loops
430 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
431 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
432
433 flag_omit_frame_pointer = 1;
434
435 /* Functions must be 8 byte aligned so we correctly handle dual issue */
436 if (align_functions < 8)
437 align_functions = 8;
438
439 spu_hint_dist = 8*4 - spu_max_nops*4;
440 if (spu_hint_dist < 0)
441 spu_hint_dist = 0;
442
443 if (spu_fixed_range_string)
444 fix_range (spu_fixed_range_string);
445
446 /* Determine processor architectural level. */
447 if (spu_arch_string)
448 {
449 if (strcmp (&spu_arch_string[0], "cell") == 0)
450 spu_arch = PROCESSOR_CELL;
451 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
452 spu_arch = PROCESSOR_CELLEDP;
453 else
454 error ("Unknown architecture '%s'", &spu_arch_string[0]);
455 }
456
457 /* Determine processor to tune for. */
458 if (spu_tune_string)
459 {
460 if (strcmp (&spu_tune_string[0], "cell") == 0)
461 spu_tune = PROCESSOR_CELL;
462 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
463 spu_tune = PROCESSOR_CELLEDP;
464 else
465 error ("Unknown architecture '%s'", &spu_tune_string[0]);
466 }
467
468 /* Change defaults according to the processor architecture. */
469 if (spu_arch == PROCESSOR_CELLEDP)
470 {
471 /* If no command line option has been otherwise specified, change
472 the default to -mno-safe-hints on celledp -- only the original
473 Cell/B.E. processors require this workaround. */
474 if (!(target_flags_explicit & MASK_SAFE_HINTS))
475 target_flags &= ~MASK_SAFE_HINTS;
476 }
477
478 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
479 }
480 \f
481 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
482 struct attribute_spec.handler. */
483
484 /* Table of machine attributes. */
485 const struct attribute_spec spu_attribute_table[] =
486 {
487 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
488 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
489 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
490 { NULL, 0, 0, false, false, false, NULL }
491 };
492
493 /* True if MODE is valid for the target. By "valid", we mean able to
494 be manipulated in non-trivial ways. In particular, this means all
495 the arithmetic is supported. */
496 static bool
497 spu_scalar_mode_supported_p (enum machine_mode mode)
498 {
499 switch (mode)
500 {
501 case QImode:
502 case HImode:
503 case SImode:
504 case SFmode:
505 case DImode:
506 case TImode:
507 case DFmode:
508 return true;
509
510 default:
511 return false;
512 }
513 }
514
515 /* Similarly for vector modes. "Supported" here is less strict. At
516 least some operations are supported; need to check optabs or builtins
517 for further details. */
518 static bool
519 spu_vector_mode_supported_p (enum machine_mode mode)
520 {
521 switch (mode)
522 {
523 case V16QImode:
524 case V8HImode:
525 case V4SImode:
526 case V2DImode:
527 case V4SFmode:
528 case V2DFmode:
529 return true;
530
531 default:
532 return false;
533 }
534 }
535
536 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
537 least significant bytes of the outer mode. This function returns
538 TRUE for the SUBREG's where this is correct. */
539 int
540 valid_subreg (rtx op)
541 {
542 enum machine_mode om = GET_MODE (op);
543 enum machine_mode im = GET_MODE (SUBREG_REG (op));
544 return om != VOIDmode && im != VOIDmode
545 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
546 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
547 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
548 }
549
550 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
551 and adjust the start offset. */
552 static rtx
553 adjust_operand (rtx op, HOST_WIDE_INT * start)
554 {
555 enum machine_mode mode;
556 int op_size;
557 /* Strip any paradoxical SUBREG. */
558 if (GET_CODE (op) == SUBREG
559 && (GET_MODE_BITSIZE (GET_MODE (op))
560 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
561 {
562 if (start)
563 *start -=
564 GET_MODE_BITSIZE (GET_MODE (op)) -
565 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
566 op = SUBREG_REG (op);
567 }
568 /* If it is smaller than SI, assure a SUBREG */
569 op_size = GET_MODE_BITSIZE (GET_MODE (op));
570 if (op_size < 32)
571 {
572 if (start)
573 *start += 32 - op_size;
574 op_size = 32;
575 }
576 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
577 mode = mode_for_size (op_size, MODE_INT, 0);
578 if (mode != GET_MODE (op))
579 op = gen_rtx_SUBREG (mode, op, 0);
580 return op;
581 }
582
583 void
584 spu_expand_extv (rtx ops[], int unsignedp)
585 {
586 rtx dst = ops[0], src = ops[1];
587 HOST_WIDE_INT width = INTVAL (ops[2]);
588 HOST_WIDE_INT start = INTVAL (ops[3]);
589 HOST_WIDE_INT align_mask;
590 rtx s0, s1, mask, r0;
591
592 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
593
594 if (MEM_P (src))
595 {
596 /* First, determine if we need 1 TImode load or 2. We need only 1
597 if the bits being extracted do not cross the alignment boundary
598 as determined by the MEM and its address. */
599
600 align_mask = -MEM_ALIGN (src);
601 if ((start & align_mask) == ((start + width - 1) & align_mask))
602 {
603 /* Alignment is sufficient for 1 load. */
604 s0 = gen_reg_rtx (TImode);
605 r0 = spu_expand_load (s0, 0, src, start / 8);
606 start &= 7;
607 if (r0)
608 emit_insn (gen_rotqby_ti (s0, s0, r0));
609 }
610 else
611 {
612 /* Need 2 loads. */
613 s0 = gen_reg_rtx (TImode);
614 s1 = gen_reg_rtx (TImode);
615 r0 = spu_expand_load (s0, s1, src, start / 8);
616 start &= 7;
617
618 gcc_assert (start + width <= 128);
619 if (r0)
620 {
621 rtx r1 = gen_reg_rtx (SImode);
622 mask = gen_reg_rtx (TImode);
623 emit_move_insn (mask, GEN_INT (-1));
624 emit_insn (gen_rotqby_ti (s0, s0, r0));
625 emit_insn (gen_rotqby_ti (s1, s1, r0));
626 if (GET_CODE (r0) == CONST_INT)
627 r1 = GEN_INT (INTVAL (r0) & 15);
628 else
629 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
630 emit_insn (gen_shlqby_ti (mask, mask, r1));
631 emit_insn (gen_selb (s0, s1, s0, mask));
632 }
633 }
634
635 }
636 else if (GET_CODE (src) == SUBREG)
637 {
638 rtx r = SUBREG_REG (src);
639 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
640 s0 = gen_reg_rtx (TImode);
641 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
642 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
643 else
644 emit_move_insn (s0, src);
645 }
646 else
647 {
648 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
649 s0 = gen_reg_rtx (TImode);
650 emit_move_insn (s0, src);
651 }
652
653 /* Now s0 is TImode and contains the bits to extract at start. */
654
655 if (start)
656 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
657
658 if (128 - width)
659 {
660 tree c = build_int_cst (NULL_TREE, 128 - width);
661 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
662 }
663
664 emit_move_insn (dst, s0);
665 }
666
667 void
668 spu_expand_insv (rtx ops[])
669 {
670 HOST_WIDE_INT width = INTVAL (ops[1]);
671 HOST_WIDE_INT start = INTVAL (ops[2]);
672 HOST_WIDE_INT maskbits;
673 enum machine_mode dst_mode, src_mode;
674 rtx dst = ops[0], src = ops[3];
675 int dst_size, src_size;
676 rtx mask;
677 rtx shift_reg;
678 int shift;
679
680
681 if (GET_CODE (ops[0]) == MEM)
682 dst = gen_reg_rtx (TImode);
683 else
684 dst = adjust_operand (dst, &start);
685 dst_mode = GET_MODE (dst);
686 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
687
688 if (CONSTANT_P (src))
689 {
690 enum machine_mode m =
691 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
692 src = force_reg (m, convert_to_mode (m, src, 0));
693 }
694 src = adjust_operand (src, 0);
695 src_mode = GET_MODE (src);
696 src_size = GET_MODE_BITSIZE (GET_MODE (src));
697
698 mask = gen_reg_rtx (dst_mode);
699 shift_reg = gen_reg_rtx (dst_mode);
700 shift = dst_size - start - width;
701
702 /* It's not safe to use subreg here because the compiler assumes
703 that the SUBREG_REG is right justified in the SUBREG. */
704 convert_move (shift_reg, src, 1);
705
706 if (shift > 0)
707 {
708 switch (dst_mode)
709 {
710 case SImode:
711 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
712 break;
713 case DImode:
714 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
715 break;
716 case TImode:
717 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
718 break;
719 default:
720 abort ();
721 }
722 }
723 else if (shift < 0)
724 abort ();
725
726 switch (dst_size)
727 {
728 case 32:
729 maskbits = (-1ll << (32 - width - start));
730 if (start)
731 maskbits += (1ll << (32 - start));
732 emit_move_insn (mask, GEN_INT (maskbits));
733 break;
734 case 64:
735 maskbits = (-1ll << (64 - width - start));
736 if (start)
737 maskbits += (1ll << (64 - start));
738 emit_move_insn (mask, GEN_INT (maskbits));
739 break;
740 case 128:
741 {
742 unsigned char arr[16];
743 int i = start / 8;
744 memset (arr, 0, sizeof (arr));
745 arr[i] = 0xff >> (start & 7);
746 for (i++; i <= (start + width - 1) / 8; i++)
747 arr[i] = 0xff;
748 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
749 emit_move_insn (mask, array_to_constant (TImode, arr));
750 }
751 break;
752 default:
753 abort ();
754 }
755 if (GET_CODE (ops[0]) == MEM)
756 {
757 rtx low = gen_reg_rtx (SImode);
758 rtx rotl = gen_reg_rtx (SImode);
759 rtx mask0 = gen_reg_rtx (TImode);
760 rtx addr;
761 rtx addr0;
762 rtx addr1;
763 rtx mem;
764
765 addr = force_reg (Pmode, XEXP (ops[0], 0));
766 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
767 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
768 emit_insn (gen_negsi2 (rotl, low));
769 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
770 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
771 mem = change_address (ops[0], TImode, addr0);
772 set_mem_alias_set (mem, 0);
773 emit_move_insn (dst, mem);
774 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
775 if (start + width > MEM_ALIGN (ops[0]))
776 {
777 rtx shl = gen_reg_rtx (SImode);
778 rtx mask1 = gen_reg_rtx (TImode);
779 rtx dst1 = gen_reg_rtx (TImode);
780 rtx mem1;
781 addr1 = plus_constant (addr, 16);
782 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
783 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
784 emit_insn (gen_shlqby_ti (mask1, mask, shl));
785 mem1 = change_address (ops[0], TImode, addr1);
786 set_mem_alias_set (mem1, 0);
787 emit_move_insn (dst1, mem1);
788 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
789 emit_move_insn (mem1, dst1);
790 }
791 emit_move_insn (mem, dst);
792 }
793 else
794 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
795 }
796
797
798 int
799 spu_expand_block_move (rtx ops[])
800 {
801 HOST_WIDE_INT bytes, align, offset;
802 rtx src, dst, sreg, dreg, target;
803 int i;
804 if (GET_CODE (ops[2]) != CONST_INT
805 || GET_CODE (ops[3]) != CONST_INT
806 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
807 return 0;
808
809 bytes = INTVAL (ops[2]);
810 align = INTVAL (ops[3]);
811
812 if (bytes <= 0)
813 return 1;
814
815 dst = ops[0];
816 src = ops[1];
817
818 if (align == 16)
819 {
820 for (offset = 0; offset + 16 <= bytes; offset += 16)
821 {
822 dst = adjust_address (ops[0], V16QImode, offset);
823 src = adjust_address (ops[1], V16QImode, offset);
824 emit_move_insn (dst, src);
825 }
826 if (offset < bytes)
827 {
828 rtx mask;
829 unsigned char arr[16] = { 0 };
830 for (i = 0; i < bytes - offset; i++)
831 arr[i] = 0xff;
832 dst = adjust_address (ops[0], V16QImode, offset);
833 src = adjust_address (ops[1], V16QImode, offset);
834 mask = gen_reg_rtx (V16QImode);
835 sreg = gen_reg_rtx (V16QImode);
836 dreg = gen_reg_rtx (V16QImode);
837 target = gen_reg_rtx (V16QImode);
838 emit_move_insn (mask, array_to_constant (V16QImode, arr));
839 emit_move_insn (dreg, dst);
840 emit_move_insn (sreg, src);
841 emit_insn (gen_selb (target, dreg, sreg, mask));
842 emit_move_insn (dst, target);
843 }
844 return 1;
845 }
846 return 0;
847 }
848
849 enum spu_comp_code
850 { SPU_EQ, SPU_GT, SPU_GTU };
851
852 int spu_comp_icode[12][3] = {
853 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
854 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
855 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
856 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
857 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
858 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
859 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
860 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
861 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
862 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
863 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
864 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
865 };
866
867 /* Generate a compare for CODE. Return a brand-new rtx that represents
868 the result of the compare. GCC can figure this out too if we don't
869 provide all variations of compares, but GCC always wants to use
870 WORD_MODE, we can generate better code in most cases if we do it
871 ourselves. */
872 void
873 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
874 {
875 int reverse_compare = 0;
876 int reverse_test = 0;
877 rtx compare_result, eq_result;
878 rtx comp_rtx, eq_rtx;
879 enum machine_mode comp_mode;
880 enum machine_mode op_mode;
881 enum spu_comp_code scode, eq_code;
882 enum insn_code ior_code;
883 enum rtx_code code = GET_CODE (cmp);
884 rtx op0 = XEXP (cmp, 0);
885 rtx op1 = XEXP (cmp, 1);
886 int index;
887 int eq_test = 0;
888
889 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
890 and so on, to keep the constant in operand 1. */
891 if (GET_CODE (op1) == CONST_INT)
892 {
893 HOST_WIDE_INT val = INTVAL (op1) - 1;
894 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
895 switch (code)
896 {
897 case GE:
898 op1 = GEN_INT (val);
899 code = GT;
900 break;
901 case LT:
902 op1 = GEN_INT (val);
903 code = LE;
904 break;
905 case GEU:
906 op1 = GEN_INT (val);
907 code = GTU;
908 break;
909 case LTU:
910 op1 = GEN_INT (val);
911 code = LEU;
912 break;
913 default:
914 break;
915 }
916 }
917
918 comp_mode = SImode;
919 op_mode = GET_MODE (op0);
920
921 switch (code)
922 {
923 case GE:
924 scode = SPU_GT;
925 if (HONOR_NANS (op_mode))
926 {
927 reverse_compare = 0;
928 reverse_test = 0;
929 eq_test = 1;
930 eq_code = SPU_EQ;
931 }
932 else
933 {
934 reverse_compare = 1;
935 reverse_test = 1;
936 }
937 break;
938 case LE:
939 scode = SPU_GT;
940 if (HONOR_NANS (op_mode))
941 {
942 reverse_compare = 1;
943 reverse_test = 0;
944 eq_test = 1;
945 eq_code = SPU_EQ;
946 }
947 else
948 {
949 reverse_compare = 0;
950 reverse_test = 1;
951 }
952 break;
953 case LT:
954 reverse_compare = 1;
955 reverse_test = 0;
956 scode = SPU_GT;
957 break;
958 case GEU:
959 reverse_compare = 1;
960 reverse_test = 1;
961 scode = SPU_GTU;
962 break;
963 case LEU:
964 reverse_compare = 0;
965 reverse_test = 1;
966 scode = SPU_GTU;
967 break;
968 case LTU:
969 reverse_compare = 1;
970 reverse_test = 0;
971 scode = SPU_GTU;
972 break;
973 case NE:
974 reverse_compare = 0;
975 reverse_test = 1;
976 scode = SPU_EQ;
977 break;
978
979 case EQ:
980 scode = SPU_EQ;
981 break;
982 case GT:
983 scode = SPU_GT;
984 break;
985 case GTU:
986 scode = SPU_GTU;
987 break;
988 default:
989 scode = SPU_EQ;
990 break;
991 }
992
993 switch (op_mode)
994 {
995 case QImode:
996 index = 0;
997 comp_mode = QImode;
998 break;
999 case HImode:
1000 index = 1;
1001 comp_mode = HImode;
1002 break;
1003 case SImode:
1004 index = 2;
1005 break;
1006 case DImode:
1007 index = 3;
1008 break;
1009 case TImode:
1010 index = 4;
1011 break;
1012 case SFmode:
1013 index = 5;
1014 break;
1015 case DFmode:
1016 index = 6;
1017 break;
1018 case V16QImode:
1019 index = 7;
1020 comp_mode = op_mode;
1021 break;
1022 case V8HImode:
1023 index = 8;
1024 comp_mode = op_mode;
1025 break;
1026 case V4SImode:
1027 index = 9;
1028 comp_mode = op_mode;
1029 break;
1030 case V4SFmode:
1031 index = 10;
1032 comp_mode = V4SImode;
1033 break;
1034 case V2DFmode:
1035 index = 11;
1036 comp_mode = V2DImode;
1037 break;
1038 case V2DImode:
1039 default:
1040 abort ();
1041 }
1042
1043 if (GET_MODE (op1) == DFmode
1044 && (scode != SPU_GT && scode != SPU_EQ))
1045 abort ();
1046
1047 if (is_set == 0 && op1 == const0_rtx
1048 && (GET_MODE (op0) == SImode
1049 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1050 {
1051 /* Don't need to set a register with the result when we are
1052 comparing against zero and branching. */
1053 reverse_test = !reverse_test;
1054 compare_result = op0;
1055 }
1056 else
1057 {
1058 compare_result = gen_reg_rtx (comp_mode);
1059
1060 if (reverse_compare)
1061 {
1062 rtx t = op1;
1063 op1 = op0;
1064 op0 = t;
1065 }
1066
1067 if (spu_comp_icode[index][scode] == 0)
1068 abort ();
1069
1070 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1071 (op0, op_mode))
1072 op0 = force_reg (op_mode, op0);
1073 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1074 (op1, op_mode))
1075 op1 = force_reg (op_mode, op1);
1076 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1077 op0, op1);
1078 if (comp_rtx == 0)
1079 abort ();
1080 emit_insn (comp_rtx);
1081
1082 if (eq_test)
1083 {
1084 eq_result = gen_reg_rtx (comp_mode);
1085 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1086 op0, op1);
1087 if (eq_rtx == 0)
1088 abort ();
1089 emit_insn (eq_rtx);
1090 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
1091 gcc_assert (ior_code != CODE_FOR_nothing);
1092 emit_insn (GEN_FCN (ior_code)
1093 (compare_result, compare_result, eq_result));
1094 }
1095 }
1096
1097 if (is_set == 0)
1098 {
1099 rtx bcomp;
1100 rtx loc_ref;
1101
1102 /* We don't have branch on QI compare insns, so we convert the
1103 QI compare result to a HI result. */
1104 if (comp_mode == QImode)
1105 {
1106 rtx old_res = compare_result;
1107 compare_result = gen_reg_rtx (HImode);
1108 comp_mode = HImode;
1109 emit_insn (gen_extendqihi2 (compare_result, old_res));
1110 }
1111
1112 if (reverse_test)
1113 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1114 else
1115 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1116
1117 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1118 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1119 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1120 loc_ref, pc_rtx)));
1121 }
1122 else if (is_set == 2)
1123 {
1124 rtx target = operands[0];
1125 int compare_size = GET_MODE_BITSIZE (comp_mode);
1126 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1127 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1128 rtx select_mask;
1129 rtx op_t = operands[2];
1130 rtx op_f = operands[3];
1131
1132 /* The result of the comparison can be SI, HI or QI mode. Create a
1133 mask based on that result. */
1134 if (target_size > compare_size)
1135 {
1136 select_mask = gen_reg_rtx (mode);
1137 emit_insn (gen_extend_compare (select_mask, compare_result));
1138 }
1139 else if (target_size < compare_size)
1140 select_mask =
1141 gen_rtx_SUBREG (mode, compare_result,
1142 (compare_size - target_size) / BITS_PER_UNIT);
1143 else if (comp_mode != mode)
1144 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1145 else
1146 select_mask = compare_result;
1147
1148 if (GET_MODE (target) != GET_MODE (op_t)
1149 || GET_MODE (target) != GET_MODE (op_f))
1150 abort ();
1151
1152 if (reverse_test)
1153 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1154 else
1155 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1156 }
1157 else
1158 {
1159 rtx target = operands[0];
1160 if (reverse_test)
1161 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1162 gen_rtx_NOT (comp_mode, compare_result)));
1163 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1164 emit_insn (gen_extendhisi2 (target, compare_result));
1165 else if (GET_MODE (target) == SImode
1166 && GET_MODE (compare_result) == QImode)
1167 emit_insn (gen_extend_compare (target, compare_result));
1168 else
1169 emit_move_insn (target, compare_result);
1170 }
1171 }
1172
1173 HOST_WIDE_INT
1174 const_double_to_hwint (rtx x)
1175 {
1176 HOST_WIDE_INT val;
1177 REAL_VALUE_TYPE rv;
1178 if (GET_MODE (x) == SFmode)
1179 {
1180 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1181 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1182 }
1183 else if (GET_MODE (x) == DFmode)
1184 {
1185 long l[2];
1186 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1187 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1188 val = l[0];
1189 val = (val << 32) | (l[1] & 0xffffffff);
1190 }
1191 else
1192 abort ();
1193 return val;
1194 }
1195
1196 rtx
1197 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1198 {
1199 long tv[2];
1200 REAL_VALUE_TYPE rv;
1201 gcc_assert (mode == SFmode || mode == DFmode);
1202
1203 if (mode == SFmode)
1204 tv[0] = (v << 32) >> 32;
1205 else if (mode == DFmode)
1206 {
1207 tv[1] = (v << 32) >> 32;
1208 tv[0] = v >> 32;
1209 }
1210 real_from_target (&rv, tv, mode);
1211 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1212 }
1213
1214 void
1215 print_operand_address (FILE * file, register rtx addr)
1216 {
1217 rtx reg;
1218 rtx offset;
1219
1220 if (GET_CODE (addr) == AND
1221 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1222 && INTVAL (XEXP (addr, 1)) == -16)
1223 addr = XEXP (addr, 0);
1224
1225 switch (GET_CODE (addr))
1226 {
1227 case REG:
1228 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1229 break;
1230
1231 case PLUS:
1232 reg = XEXP (addr, 0);
1233 offset = XEXP (addr, 1);
1234 if (GET_CODE (offset) == REG)
1235 {
1236 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1237 reg_names[REGNO (offset)]);
1238 }
1239 else if (GET_CODE (offset) == CONST_INT)
1240 {
1241 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1242 INTVAL (offset), reg_names[REGNO (reg)]);
1243 }
1244 else
1245 abort ();
1246 break;
1247
1248 case CONST:
1249 case LABEL_REF:
1250 case SYMBOL_REF:
1251 case CONST_INT:
1252 output_addr_const (file, addr);
1253 break;
1254
1255 default:
1256 debug_rtx (addr);
1257 abort ();
1258 }
1259 }
1260
1261 void
1262 print_operand (FILE * file, rtx x, int code)
1263 {
1264 enum machine_mode mode = GET_MODE (x);
1265 HOST_WIDE_INT val;
1266 unsigned char arr[16];
1267 int xcode = GET_CODE (x);
1268 int i, info;
1269 if (GET_MODE (x) == VOIDmode)
1270 switch (code)
1271 {
1272 case 'L': /* 128 bits, signed */
1273 case 'm': /* 128 bits, signed */
1274 case 'T': /* 128 bits, signed */
1275 case 't': /* 128 bits, signed */
1276 mode = TImode;
1277 break;
1278 case 'K': /* 64 bits, signed */
1279 case 'k': /* 64 bits, signed */
1280 case 'D': /* 64 bits, signed */
1281 case 'd': /* 64 bits, signed */
1282 mode = DImode;
1283 break;
1284 case 'J': /* 32 bits, signed */
1285 case 'j': /* 32 bits, signed */
1286 case 's': /* 32 bits, signed */
1287 case 'S': /* 32 bits, signed */
1288 mode = SImode;
1289 break;
1290 }
1291 switch (code)
1292 {
1293
1294 case 'j': /* 32 bits, signed */
1295 case 'k': /* 64 bits, signed */
1296 case 'm': /* 128 bits, signed */
1297 if (xcode == CONST_INT
1298 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1299 {
1300 gcc_assert (logical_immediate_p (x, mode));
1301 constant_to_array (mode, x, arr);
1302 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1303 val = trunc_int_for_mode (val, SImode);
1304 switch (which_logical_immediate (val))
1305 {
1306 case SPU_ORI:
1307 break;
1308 case SPU_ORHI:
1309 fprintf (file, "h");
1310 break;
1311 case SPU_ORBI:
1312 fprintf (file, "b");
1313 break;
1314 default:
1315 gcc_unreachable();
1316 }
1317 }
1318 else
1319 gcc_unreachable();
1320 return;
1321
1322 case 'J': /* 32 bits, signed */
1323 case 'K': /* 64 bits, signed */
1324 case 'L': /* 128 bits, signed */
1325 if (xcode == CONST_INT
1326 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1327 {
1328 gcc_assert (logical_immediate_p (x, mode)
1329 || iohl_immediate_p (x, mode));
1330 constant_to_array (mode, x, arr);
1331 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1332 val = trunc_int_for_mode (val, SImode);
1333 switch (which_logical_immediate (val))
1334 {
1335 case SPU_ORI:
1336 case SPU_IOHL:
1337 break;
1338 case SPU_ORHI:
1339 val = trunc_int_for_mode (val, HImode);
1340 break;
1341 case SPU_ORBI:
1342 val = trunc_int_for_mode (val, QImode);
1343 break;
1344 default:
1345 gcc_unreachable();
1346 }
1347 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1348 }
1349 else
1350 gcc_unreachable();
1351 return;
1352
1353 case 't': /* 128 bits, signed */
1354 case 'd': /* 64 bits, signed */
1355 case 's': /* 32 bits, signed */
1356 if (CONSTANT_P (x))
1357 {
1358 enum immediate_class c = classify_immediate (x, mode);
1359 switch (c)
1360 {
1361 case IC_IL1:
1362 constant_to_array (mode, x, arr);
1363 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1364 val = trunc_int_for_mode (val, SImode);
1365 switch (which_immediate_load (val))
1366 {
1367 case SPU_IL:
1368 break;
1369 case SPU_ILA:
1370 fprintf (file, "a");
1371 break;
1372 case SPU_ILH:
1373 fprintf (file, "h");
1374 break;
1375 case SPU_ILHU:
1376 fprintf (file, "hu");
1377 break;
1378 default:
1379 gcc_unreachable ();
1380 }
1381 break;
1382 case IC_CPAT:
1383 constant_to_array (mode, x, arr);
1384 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1385 if (info == 1)
1386 fprintf (file, "b");
1387 else if (info == 2)
1388 fprintf (file, "h");
1389 else if (info == 4)
1390 fprintf (file, "w");
1391 else if (info == 8)
1392 fprintf (file, "d");
1393 break;
1394 case IC_IL1s:
1395 if (xcode == CONST_VECTOR)
1396 {
1397 x = CONST_VECTOR_ELT (x, 0);
1398 xcode = GET_CODE (x);
1399 }
1400 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1401 fprintf (file, "a");
1402 else if (xcode == HIGH)
1403 fprintf (file, "hu");
1404 break;
1405 case IC_FSMBI:
1406 case IC_FSMBI2:
1407 case IC_IL2:
1408 case IC_IL2s:
1409 case IC_POOL:
1410 abort ();
1411 }
1412 }
1413 else
1414 gcc_unreachable ();
1415 return;
1416
1417 case 'T': /* 128 bits, signed */
1418 case 'D': /* 64 bits, signed */
1419 case 'S': /* 32 bits, signed */
1420 if (CONSTANT_P (x))
1421 {
1422 enum immediate_class c = classify_immediate (x, mode);
1423 switch (c)
1424 {
1425 case IC_IL1:
1426 constant_to_array (mode, x, arr);
1427 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1428 val = trunc_int_for_mode (val, SImode);
1429 switch (which_immediate_load (val))
1430 {
1431 case SPU_IL:
1432 case SPU_ILA:
1433 break;
1434 case SPU_ILH:
1435 case SPU_ILHU:
1436 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1437 break;
1438 default:
1439 gcc_unreachable ();
1440 }
1441 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1442 break;
1443 case IC_FSMBI:
1444 constant_to_array (mode, x, arr);
1445 val = 0;
1446 for (i = 0; i < 16; i++)
1447 {
1448 val <<= 1;
1449 val |= arr[i] & 1;
1450 }
1451 print_operand (file, GEN_INT (val), 0);
1452 break;
1453 case IC_CPAT:
1454 constant_to_array (mode, x, arr);
1455 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1456 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1457 break;
1458 case IC_IL1s:
1459 if (xcode == HIGH)
1460 x = XEXP (x, 0);
1461 if (GET_CODE (x) == CONST_VECTOR)
1462 x = CONST_VECTOR_ELT (x, 0);
1463 output_addr_const (file, x);
1464 if (xcode == HIGH)
1465 fprintf (file, "@h");
1466 break;
1467 case IC_IL2:
1468 case IC_IL2s:
1469 case IC_FSMBI2:
1470 case IC_POOL:
1471 abort ();
1472 }
1473 }
1474 else
1475 gcc_unreachable ();
1476 return;
1477
1478 case 'C':
1479 if (xcode == CONST_INT)
1480 {
1481 /* Only 4 least significant bits are relevant for generate
1482 control word instructions. */
1483 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1484 return;
1485 }
1486 break;
1487
1488 case 'M': /* print code for c*d */
1489 if (GET_CODE (x) == CONST_INT)
1490 switch (INTVAL (x))
1491 {
1492 case 1:
1493 fprintf (file, "b");
1494 break;
1495 case 2:
1496 fprintf (file, "h");
1497 break;
1498 case 4:
1499 fprintf (file, "w");
1500 break;
1501 case 8:
1502 fprintf (file, "d");
1503 break;
1504 default:
1505 gcc_unreachable();
1506 }
1507 else
1508 gcc_unreachable();
1509 return;
1510
1511 case 'N': /* Negate the operand */
1512 if (xcode == CONST_INT)
1513 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1514 else if (xcode == CONST_VECTOR)
1515 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1516 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1517 return;
1518
1519 case 'I': /* enable/disable interrupts */
1520 if (xcode == CONST_INT)
1521 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1522 return;
1523
1524 case 'b': /* branch modifiers */
1525 if (xcode == REG)
1526 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1527 else if (COMPARISON_P (x))
1528 fprintf (file, "%s", xcode == NE ? "n" : "");
1529 return;
1530
1531 case 'i': /* indirect call */
1532 if (xcode == MEM)
1533 {
1534 if (GET_CODE (XEXP (x, 0)) == REG)
1535 /* Used in indirect function calls. */
1536 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1537 else
1538 output_address (XEXP (x, 0));
1539 }
1540 return;
1541
1542 case 'p': /* load/store */
1543 if (xcode == MEM)
1544 {
1545 x = XEXP (x, 0);
1546 xcode = GET_CODE (x);
1547 }
1548 if (xcode == AND)
1549 {
1550 x = XEXP (x, 0);
1551 xcode = GET_CODE (x);
1552 }
1553 if (xcode == REG)
1554 fprintf (file, "d");
1555 else if (xcode == CONST_INT)
1556 fprintf (file, "a");
1557 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1558 fprintf (file, "r");
1559 else if (xcode == PLUS || xcode == LO_SUM)
1560 {
1561 if (GET_CODE (XEXP (x, 1)) == REG)
1562 fprintf (file, "x");
1563 else
1564 fprintf (file, "d");
1565 }
1566 return;
1567
1568 case 'e':
1569 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1570 val &= 0x7;
1571 output_addr_const (file, GEN_INT (val));
1572 return;
1573
1574 case 'f':
1575 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1576 val &= 0x1f;
1577 output_addr_const (file, GEN_INT (val));
1578 return;
1579
1580 case 'g':
1581 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1582 val &= 0x3f;
1583 output_addr_const (file, GEN_INT (val));
1584 return;
1585
1586 case 'h':
1587 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1588 val = (val >> 3) & 0x1f;
1589 output_addr_const (file, GEN_INT (val));
1590 return;
1591
1592 case 'E':
1593 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1594 val = -val;
1595 val &= 0x7;
1596 output_addr_const (file, GEN_INT (val));
1597 return;
1598
1599 case 'F':
1600 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1601 val = -val;
1602 val &= 0x1f;
1603 output_addr_const (file, GEN_INT (val));
1604 return;
1605
1606 case 'G':
1607 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1608 val = -val;
1609 val &= 0x3f;
1610 output_addr_const (file, GEN_INT (val));
1611 return;
1612
1613 case 'H':
1614 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1615 val = -(val & -8ll);
1616 val = (val >> 3) & 0x1f;
1617 output_addr_const (file, GEN_INT (val));
1618 return;
1619
1620 case 'v':
1621 case 'w':
1622 constant_to_array (mode, x, arr);
1623 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1624 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1625 return;
1626
1627 case 0:
1628 if (xcode == REG)
1629 fprintf (file, "%s", reg_names[REGNO (x)]);
1630 else if (xcode == MEM)
1631 output_address (XEXP (x, 0));
1632 else if (xcode == CONST_VECTOR)
1633 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1634 else
1635 output_addr_const (file, x);
1636 return;
1637
1638 /* unused letters
1639 o qr u yz
1640 AB OPQR UVWXYZ */
1641 default:
1642 output_operand_lossage ("invalid %%xn code");
1643 }
1644 gcc_unreachable ();
1645 }
1646
1647 extern char call_used_regs[];
1648
1649 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1650 caller saved register. For leaf functions it is more efficient to
1651 use a volatile register because we won't need to save and restore the
1652 pic register. This routine is only valid after register allocation
1653 is completed, so we can pick an unused register. */
1654 static rtx
1655 get_pic_reg (void)
1656 {
1657 rtx pic_reg = pic_offset_table_rtx;
1658 if (!reload_completed && !reload_in_progress)
1659 abort ();
1660 return pic_reg;
1661 }
1662
1663 /* Split constant addresses to handle cases that are too large.
1664 Add in the pic register when in PIC mode.
1665 Split immediates that require more than 1 instruction. */
1666 int
1667 spu_split_immediate (rtx * ops)
1668 {
1669 enum machine_mode mode = GET_MODE (ops[0]);
1670 enum immediate_class c = classify_immediate (ops[1], mode);
1671
1672 switch (c)
1673 {
1674 case IC_IL2:
1675 {
1676 unsigned char arrhi[16];
1677 unsigned char arrlo[16];
1678 rtx to, temp, hi, lo;
1679 int i;
1680 enum machine_mode imode = mode;
1681 /* We need to do reals as ints because the constant used in the
1682 IOR might not be a legitimate real constant. */
1683 imode = int_mode_for_mode (mode);
1684 constant_to_array (mode, ops[1], arrhi);
1685 if (imode != mode)
1686 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1687 else
1688 to = ops[0];
1689 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1690 for (i = 0; i < 16; i += 4)
1691 {
1692 arrlo[i + 2] = arrhi[i + 2];
1693 arrlo[i + 3] = arrhi[i + 3];
1694 arrlo[i + 0] = arrlo[i + 1] = 0;
1695 arrhi[i + 2] = arrhi[i + 3] = 0;
1696 }
1697 hi = array_to_constant (imode, arrhi);
1698 lo = array_to_constant (imode, arrlo);
1699 emit_move_insn (temp, hi);
1700 emit_insn (gen_rtx_SET
1701 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1702 return 1;
1703 }
1704 case IC_FSMBI2:
1705 {
1706 unsigned char arr_fsmbi[16];
1707 unsigned char arr_andbi[16];
1708 rtx to, reg_fsmbi, reg_and;
1709 int i;
1710 enum machine_mode imode = mode;
1711 /* We need to do reals as ints because the constant used in the
1712 * AND might not be a legitimate real constant. */
1713 imode = int_mode_for_mode (mode);
1714 constant_to_array (mode, ops[1], arr_fsmbi);
1715 if (imode != mode)
1716 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1717 else
1718 to = ops[0];
1719 for (i = 0; i < 16; i++)
1720 if (arr_fsmbi[i] != 0)
1721 {
1722 arr_andbi[0] = arr_fsmbi[i];
1723 arr_fsmbi[i] = 0xff;
1724 }
1725 for (i = 1; i < 16; i++)
1726 arr_andbi[i] = arr_andbi[0];
1727 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1728 reg_and = array_to_constant (imode, arr_andbi);
1729 emit_move_insn (to, reg_fsmbi);
1730 emit_insn (gen_rtx_SET
1731 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1732 return 1;
1733 }
1734 case IC_POOL:
1735 if (reload_in_progress || reload_completed)
1736 {
1737 rtx mem = force_const_mem (mode, ops[1]);
1738 if (TARGET_LARGE_MEM)
1739 {
1740 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1741 emit_move_insn (addr, XEXP (mem, 0));
1742 mem = replace_equiv_address (mem, addr);
1743 }
1744 emit_move_insn (ops[0], mem);
1745 return 1;
1746 }
1747 break;
1748 case IC_IL1s:
1749 case IC_IL2s:
1750 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1751 {
1752 if (c == IC_IL2s)
1753 {
1754 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1755 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1756 }
1757 else if (flag_pic)
1758 emit_insn (gen_pic (ops[0], ops[1]));
1759 if (flag_pic)
1760 {
1761 rtx pic_reg = get_pic_reg ();
1762 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1763 crtl->uses_pic_offset_table = 1;
1764 }
1765 return flag_pic || c == IC_IL2s;
1766 }
1767 break;
1768 case IC_IL1:
1769 case IC_FSMBI:
1770 case IC_CPAT:
1771 break;
1772 }
1773 return 0;
1774 }
1775
1776 /* SAVING is TRUE when we are generating the actual load and store
1777 instructions for REGNO. When determining the size of the stack
1778 needed for saving register we must allocate enough space for the
1779 worst case, because we don't always have the information early enough
1780 to not allocate it. But we can at least eliminate the actual loads
1781 and stores during the prologue/epilogue. */
1782 static int
1783 need_to_save_reg (int regno, int saving)
1784 {
1785 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1786 return 1;
1787 if (flag_pic
1788 && regno == PIC_OFFSET_TABLE_REGNUM
1789 && (!saving || crtl->uses_pic_offset_table)
1790 && (!saving
1791 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1792 return 1;
1793 return 0;
1794 }
1795
1796 /* This function is only correct starting with local register
1797 allocation */
1798 int
1799 spu_saved_regs_size (void)
1800 {
1801 int reg_save_size = 0;
1802 int regno;
1803
1804 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1805 if (need_to_save_reg (regno, 0))
1806 reg_save_size += 0x10;
1807 return reg_save_size;
1808 }
1809
1810 static rtx
1811 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1812 {
1813 rtx reg = gen_rtx_REG (V4SImode, regno);
1814 rtx mem =
1815 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1816 return emit_insn (gen_movv4si (mem, reg));
1817 }
1818
1819 static rtx
1820 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1821 {
1822 rtx reg = gen_rtx_REG (V4SImode, regno);
1823 rtx mem =
1824 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1825 return emit_insn (gen_movv4si (reg, mem));
1826 }
1827
1828 /* This happens after reload, so we need to expand it. */
1829 static rtx
1830 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1831 {
1832 rtx insn;
1833 if (satisfies_constraint_K (GEN_INT (imm)))
1834 {
1835 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1836 }
1837 else
1838 {
1839 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1840 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1841 if (REGNO (src) == REGNO (scratch))
1842 abort ();
1843 }
1844 return insn;
1845 }
1846
1847 /* Return nonzero if this function is known to have a null epilogue. */
1848
1849 int
1850 direct_return (void)
1851 {
1852 if (reload_completed)
1853 {
1854 if (cfun->static_chain_decl == 0
1855 && (spu_saved_regs_size ()
1856 + get_frame_size ()
1857 + crtl->outgoing_args_size
1858 + crtl->args.pretend_args_size == 0)
1859 && current_function_is_leaf)
1860 return 1;
1861 }
1862 return 0;
1863 }
1864
1865 /*
1866 The stack frame looks like this:
1867 +-------------+
1868 | incoming |
1869 | args |
1870 AP -> +-------------+
1871 | $lr save |
1872 +-------------+
1873 prev SP | back chain |
1874 +-------------+
1875 | var args |
1876 | reg save | crtl->args.pretend_args_size bytes
1877 +-------------+
1878 | ... |
1879 | saved regs | spu_saved_regs_size() bytes
1880 FP -> +-------------+
1881 | ... |
1882 | vars | get_frame_size() bytes
1883 HFP -> +-------------+
1884 | ... |
1885 | outgoing |
1886 | args | crtl->outgoing_args_size bytes
1887 +-------------+
1888 | $lr of next |
1889 | frame |
1890 +-------------+
1891 | back chain |
1892 SP -> +-------------+
1893
1894 */
1895 void
1896 spu_expand_prologue (void)
1897 {
1898 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1899 HOST_WIDE_INT total_size;
1900 HOST_WIDE_INT saved_regs_size;
1901 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1902 rtx scratch_reg_0, scratch_reg_1;
1903 rtx insn, real;
1904
1905 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1906 the "toplevel" insn chain. */
1907 emit_note (NOTE_INSN_DELETED);
1908
1909 if (flag_pic && optimize == 0)
1910 crtl->uses_pic_offset_table = 1;
1911
1912 if (spu_naked_function_p (current_function_decl))
1913 return;
1914
1915 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1916 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1917
1918 saved_regs_size = spu_saved_regs_size ();
1919 total_size = size + saved_regs_size
1920 + crtl->outgoing_args_size
1921 + crtl->args.pretend_args_size;
1922
1923 if (!current_function_is_leaf
1924 || cfun->calls_alloca || total_size > 0)
1925 total_size += STACK_POINTER_OFFSET;
1926
1927 /* Save this first because code after this might use the link
1928 register as a scratch register. */
1929 if (!current_function_is_leaf)
1930 {
1931 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1932 RTX_FRAME_RELATED_P (insn) = 1;
1933 }
1934
1935 if (total_size > 0)
1936 {
1937 offset = -crtl->args.pretend_args_size;
1938 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1939 if (need_to_save_reg (regno, 1))
1940 {
1941 offset -= 16;
1942 insn = frame_emit_store (regno, sp_reg, offset);
1943 RTX_FRAME_RELATED_P (insn) = 1;
1944 }
1945 }
1946
1947 if (flag_pic && crtl->uses_pic_offset_table)
1948 {
1949 rtx pic_reg = get_pic_reg ();
1950 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1951 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1952 }
1953
1954 if (total_size > 0)
1955 {
1956 if (flag_stack_check)
1957 {
1958 /* We compare against total_size-1 because
1959 ($sp >= total_size) <=> ($sp > total_size-1) */
1960 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1961 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1962 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1963 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1964 {
1965 emit_move_insn (scratch_v4si, size_v4si);
1966 size_v4si = scratch_v4si;
1967 }
1968 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1969 emit_insn (gen_vec_extractv4si
1970 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1971 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1972 }
1973
1974 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1975 the value of the previous $sp because we save it as the back
1976 chain. */
1977 if (total_size <= 2000)
1978 {
1979 /* In this case we save the back chain first. */
1980 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1981 insn =
1982 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1983 }
1984 else
1985 {
1986 insn = emit_move_insn (scratch_reg_0, sp_reg);
1987 insn =
1988 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1989 }
1990 RTX_FRAME_RELATED_P (insn) = 1;
1991 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1992 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
1993
1994 if (total_size > 2000)
1995 {
1996 /* Save the back chain ptr */
1997 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1998 }
1999
2000 if (frame_pointer_needed)
2001 {
2002 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2003 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2004 + crtl->outgoing_args_size;
2005 /* Set the new frame_pointer */
2006 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2007 RTX_FRAME_RELATED_P (insn) = 1;
2008 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2009 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2010 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2011 }
2012 }
2013
2014 emit_note (NOTE_INSN_DELETED);
2015 }
2016
2017 void
2018 spu_expand_epilogue (bool sibcall_p)
2019 {
2020 int size = get_frame_size (), offset, regno;
2021 HOST_WIDE_INT saved_regs_size, total_size;
2022 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2023 rtx jump, scratch_reg_0;
2024
2025 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
2026 the "toplevel" insn chain. */
2027 emit_note (NOTE_INSN_DELETED);
2028
2029 if (spu_naked_function_p (current_function_decl))
2030 return;
2031
2032 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2033
2034 saved_regs_size = spu_saved_regs_size ();
2035 total_size = size + saved_regs_size
2036 + crtl->outgoing_args_size
2037 + crtl->args.pretend_args_size;
2038
2039 if (!current_function_is_leaf
2040 || cfun->calls_alloca || total_size > 0)
2041 total_size += STACK_POINTER_OFFSET;
2042
2043 if (total_size > 0)
2044 {
2045 if (cfun->calls_alloca)
2046 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2047 else
2048 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2049
2050
2051 if (saved_regs_size > 0)
2052 {
2053 offset = -crtl->args.pretend_args_size;
2054 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2055 if (need_to_save_reg (regno, 1))
2056 {
2057 offset -= 0x10;
2058 frame_emit_load (regno, sp_reg, offset);
2059 }
2060 }
2061 }
2062
2063 if (!current_function_is_leaf)
2064 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2065
2066 if (!sibcall_p)
2067 {
2068 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2069 jump = emit_jump_insn (gen__return ());
2070 emit_barrier_after (jump);
2071 }
2072
2073 emit_note (NOTE_INSN_DELETED);
2074 }
2075
2076 rtx
2077 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2078 {
2079 if (count != 0)
2080 return 0;
2081 /* This is inefficient because it ends up copying to a save-register
2082 which then gets saved even though $lr has already been saved. But
2083 it does generate better code for leaf functions and we don't need
2084 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2085 used for __builtin_return_address anyway, so maybe we don't care if
2086 it's inefficient. */
2087 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2088 }
2089 \f
2090
2091 /* Given VAL, generate a constant appropriate for MODE.
2092 If MODE is a vector mode, every element will be VAL.
2093 For TImode, VAL will be zero extended to 128 bits. */
2094 rtx
2095 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2096 {
2097 rtx inner;
2098 rtvec v;
2099 int units, i;
2100
2101 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2102 || GET_MODE_CLASS (mode) == MODE_FLOAT
2103 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2104 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2105
2106 if (GET_MODE_CLASS (mode) == MODE_INT)
2107 return immed_double_const (val, 0, mode);
2108
2109 /* val is the bit representation of the float */
2110 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2111 return hwint_to_const_double (mode, val);
2112
2113 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2114 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2115 else
2116 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2117
2118 units = GET_MODE_NUNITS (mode);
2119
2120 v = rtvec_alloc (units);
2121
2122 for (i = 0; i < units; ++i)
2123 RTVEC_ELT (v, i) = inner;
2124
2125 return gen_rtx_CONST_VECTOR (mode, v);
2126 }
2127
2128 /* Create a MODE vector constant from 4 ints. */
2129 rtx
2130 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2131 {
2132 unsigned char arr[16];
2133 arr[0] = (a >> 24) & 0xff;
2134 arr[1] = (a >> 16) & 0xff;
2135 arr[2] = (a >> 8) & 0xff;
2136 arr[3] = (a >> 0) & 0xff;
2137 arr[4] = (b >> 24) & 0xff;
2138 arr[5] = (b >> 16) & 0xff;
2139 arr[6] = (b >> 8) & 0xff;
2140 arr[7] = (b >> 0) & 0xff;
2141 arr[8] = (c >> 24) & 0xff;
2142 arr[9] = (c >> 16) & 0xff;
2143 arr[10] = (c >> 8) & 0xff;
2144 arr[11] = (c >> 0) & 0xff;
2145 arr[12] = (d >> 24) & 0xff;
2146 arr[13] = (d >> 16) & 0xff;
2147 arr[14] = (d >> 8) & 0xff;
2148 arr[15] = (d >> 0) & 0xff;
2149 return array_to_constant(mode, arr);
2150 }
2151 \f
2152 /* branch hint stuff */
2153
2154 /* An array of these is used to propagate hints to predecessor blocks. */
2155 struct spu_bb_info
2156 {
2157 rtx prop_jump; /* propagated from another block */
2158 int bb_index; /* the original block. */
2159 };
2160 static struct spu_bb_info *spu_bb_info;
2161
2162 #define STOP_HINT_P(INSN) \
2163 (GET_CODE(INSN) == CALL_INSN \
2164 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2165 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2166
2167 /* 1 when RTX is a hinted branch or its target. We keep track of
2168 what has been hinted so the safe-hint code can test it easily. */
2169 #define HINTED_P(RTX) \
2170 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2171
2172 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2173 #define SCHED_ON_EVEN_P(RTX) \
2174 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2175
2176 /* Emit a nop for INSN such that the two will dual issue. This assumes
2177 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2178 We check for TImode to handle a MULTI1 insn which has dual issued its
2179 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2180 ADDR_VEC insns. */
2181 static void
2182 emit_nop_for_insn (rtx insn)
2183 {
2184 int p;
2185 rtx new_insn;
2186 p = get_pipe (insn);
2187 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2188 new_insn = emit_insn_after (gen_lnop (), insn);
2189 else if (p == 1 && GET_MODE (insn) == TImode)
2190 {
2191 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2192 PUT_MODE (new_insn, TImode);
2193 PUT_MODE (insn, VOIDmode);
2194 }
2195 else
2196 new_insn = emit_insn_after (gen_lnop (), insn);
2197 recog_memoized (new_insn);
2198 }
2199
2200 /* Insert nops in basic blocks to meet dual issue alignment
2201 requirements. Also make sure hbrp and hint instructions are at least
2202 one cycle apart, possibly inserting a nop. */
2203 static void
2204 pad_bb(void)
2205 {
2206 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2207 int length;
2208 int addr;
2209
2210 /* This sets up INSN_ADDRESSES. */
2211 shorten_branches (get_insns ());
2212
2213 /* Keep track of length added by nops. */
2214 length = 0;
2215
2216 prev_insn = 0;
2217 insn = get_insns ();
2218 if (!active_insn_p (insn))
2219 insn = next_active_insn (insn);
2220 for (; insn; insn = next_insn)
2221 {
2222 next_insn = next_active_insn (insn);
2223 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2224 || INSN_CODE (insn) == CODE_FOR_hbr)
2225 {
2226 if (hbr_insn)
2227 {
2228 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2229 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2230 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2231 || (a1 - a0 == 4))
2232 {
2233 prev_insn = emit_insn_before (gen_lnop (), insn);
2234 PUT_MODE (prev_insn, GET_MODE (insn));
2235 PUT_MODE (insn, TImode);
2236 length += 4;
2237 }
2238 }
2239 hbr_insn = insn;
2240 }
2241 if (INSN_CODE (insn) == CODE_FOR_blockage)
2242 {
2243 if (GET_MODE (insn) == TImode)
2244 PUT_MODE (next_insn, TImode);
2245 insn = next_insn;
2246 next_insn = next_active_insn (insn);
2247 }
2248 addr = INSN_ADDRESSES (INSN_UID (insn));
2249 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2250 {
2251 if (((addr + length) & 7) != 0)
2252 {
2253 emit_nop_for_insn (prev_insn);
2254 length += 4;
2255 }
2256 }
2257 else if (GET_MODE (insn) == TImode
2258 && ((next_insn && GET_MODE (next_insn) != TImode)
2259 || get_attr_type (insn) == TYPE_MULTI0)
2260 && ((addr + length) & 7) != 0)
2261 {
2262 /* prev_insn will always be set because the first insn is
2263 always 8-byte aligned. */
2264 emit_nop_for_insn (prev_insn);
2265 length += 4;
2266 }
2267 prev_insn = insn;
2268 }
2269 }
2270
2271 \f
2272 /* Routines for branch hints. */
2273
2274 static void
2275 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2276 int distance, sbitmap blocks)
2277 {
2278 rtx branch_label = 0;
2279 rtx hint;
2280 rtx insn;
2281 rtx table;
2282
2283 if (before == 0 || branch == 0 || target == 0)
2284 return;
2285
2286 /* While scheduling we require hints to be no further than 600, so
2287 we need to enforce that here too */
2288 if (distance > 600)
2289 return;
2290
2291 /* If we have a Basic block note, emit it after the basic block note. */
2292 if (NOTE_KIND (before) == NOTE_INSN_BASIC_BLOCK)
2293 before = NEXT_INSN (before);
2294
2295 branch_label = gen_label_rtx ();
2296 LABEL_NUSES (branch_label)++;
2297 LABEL_PRESERVE_P (branch_label) = 1;
2298 insn = emit_label_before (branch_label, branch);
2299 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2300 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2301
2302 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2303 recog_memoized (hint);
2304 HINTED_P (branch) = 1;
2305
2306 if (GET_CODE (target) == LABEL_REF)
2307 HINTED_P (XEXP (target, 0)) = 1;
2308 else if (tablejump_p (branch, 0, &table))
2309 {
2310 rtvec vec;
2311 int j;
2312 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2313 vec = XVEC (PATTERN (table), 0);
2314 else
2315 vec = XVEC (PATTERN (table), 1);
2316 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2317 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2318 }
2319
2320 if (distance >= 588)
2321 {
2322 /* Make sure the hint isn't scheduled any earlier than this point,
2323 which could make it too far for the branch offest to fit */
2324 recog_memoized (emit_insn_before (gen_blockage (), hint));
2325 }
2326 else if (distance <= 8 * 4)
2327 {
2328 /* To guarantee at least 8 insns between the hint and branch we
2329 insert nops. */
2330 int d;
2331 for (d = distance; d < 8 * 4; d += 4)
2332 {
2333 insn =
2334 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2335 recog_memoized (insn);
2336 }
2337
2338 /* Make sure any nops inserted aren't scheduled before the hint. */
2339 recog_memoized (emit_insn_after (gen_blockage (), hint));
2340
2341 /* Make sure any nops inserted aren't scheduled after the call. */
2342 if (CALL_P (branch) && distance < 8 * 4)
2343 recog_memoized (emit_insn_before (gen_blockage (), branch));
2344 }
2345 }
2346
2347 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2348 the rtx for the branch target. */
2349 static rtx
2350 get_branch_target (rtx branch)
2351 {
2352 if (GET_CODE (branch) == JUMP_INSN)
2353 {
2354 rtx set, src;
2355
2356 /* Return statements */
2357 if (GET_CODE (PATTERN (branch)) == RETURN)
2358 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2359
2360 /* jump table */
2361 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2362 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2363 return 0;
2364
2365 set = single_set (branch);
2366 src = SET_SRC (set);
2367 if (GET_CODE (SET_DEST (set)) != PC)
2368 abort ();
2369
2370 if (GET_CODE (src) == IF_THEN_ELSE)
2371 {
2372 rtx lab = 0;
2373 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2374 if (note)
2375 {
2376 /* If the more probable case is not a fall through, then
2377 try a branch hint. */
2378 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2379 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2380 && GET_CODE (XEXP (src, 1)) != PC)
2381 lab = XEXP (src, 1);
2382 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2383 && GET_CODE (XEXP (src, 2)) != PC)
2384 lab = XEXP (src, 2);
2385 }
2386 if (lab)
2387 {
2388 if (GET_CODE (lab) == RETURN)
2389 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2390 return lab;
2391 }
2392 return 0;
2393 }
2394
2395 return src;
2396 }
2397 else if (GET_CODE (branch) == CALL_INSN)
2398 {
2399 rtx call;
2400 /* All of our call patterns are in a PARALLEL and the CALL is
2401 the first pattern in the PARALLEL. */
2402 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2403 abort ();
2404 call = XVECEXP (PATTERN (branch), 0, 0);
2405 if (GET_CODE (call) == SET)
2406 call = SET_SRC (call);
2407 if (GET_CODE (call) != CALL)
2408 abort ();
2409 return XEXP (XEXP (call, 0), 0);
2410 }
2411 return 0;
2412 }
2413
2414 /* The special $hbr register is used to prevent the insn scheduler from
2415 moving hbr insns across instructions which invalidate them. It
2416 should only be used in a clobber, and this function searches for
2417 insns which clobber it. */
2418 static bool
2419 insn_clobbers_hbr (rtx insn)
2420 {
2421 if (INSN_P (insn)
2422 && GET_CODE (PATTERN (insn)) == PARALLEL)
2423 {
2424 rtx parallel = PATTERN (insn);
2425 rtx clobber;
2426 int j;
2427 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2428 {
2429 clobber = XVECEXP (parallel, 0, j);
2430 if (GET_CODE (clobber) == CLOBBER
2431 && GET_CODE (XEXP (clobber, 0)) == REG
2432 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2433 return 1;
2434 }
2435 }
2436 return 0;
2437 }
2438
2439 /* Search up to 32 insns starting at FIRST:
2440 - at any kind of hinted branch, just return
2441 - at any unconditional branch in the first 15 insns, just return
2442 - at a call or indirect branch, after the first 15 insns, force it to
2443 an even address and return
2444 - at any unconditional branch, after the first 15 insns, force it to
2445 an even address.
2446 At then end of the search, insert an hbrp within 4 insns of FIRST,
2447 and an hbrp within 16 instructions of FIRST.
2448 */
2449 static void
2450 insert_hbrp_for_ilb_runout (rtx first)
2451 {
2452 rtx insn, before_4 = 0, before_16 = 0;
2453 int addr = 0, length, first_addr = -1;
2454 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2455 int insert_lnop_after = 0;
2456 for (insn = first; insn; insn = NEXT_INSN (insn))
2457 if (INSN_P (insn))
2458 {
2459 if (first_addr == -1)
2460 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2461 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2462 length = get_attr_length (insn);
2463
2464 if (before_4 == 0 && addr + length >= 4 * 4)
2465 before_4 = insn;
2466 /* We test for 14 instructions because the first hbrp will add
2467 up to 2 instructions. */
2468 if (before_16 == 0 && addr + length >= 14 * 4)
2469 before_16 = insn;
2470
2471 if (INSN_CODE (insn) == CODE_FOR_hbr)
2472 {
2473 /* Make sure an hbrp is at least 2 cycles away from a hint.
2474 Insert an lnop after the hbrp when necessary. */
2475 if (before_4 == 0 && addr > 0)
2476 {
2477 before_4 = insn;
2478 insert_lnop_after |= 1;
2479 }
2480 else if (before_4 && addr <= 4 * 4)
2481 insert_lnop_after |= 1;
2482 if (before_16 == 0 && addr > 10 * 4)
2483 {
2484 before_16 = insn;
2485 insert_lnop_after |= 2;
2486 }
2487 else if (before_16 && addr <= 14 * 4)
2488 insert_lnop_after |= 2;
2489 }
2490
2491 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2492 {
2493 if (addr < hbrp_addr0)
2494 hbrp_addr0 = addr;
2495 else if (addr < hbrp_addr1)
2496 hbrp_addr1 = addr;
2497 }
2498
2499 if (CALL_P (insn) || JUMP_P (insn))
2500 {
2501 if (HINTED_P (insn))
2502 return;
2503
2504 /* Any branch after the first 15 insns should be on an even
2505 address to avoid a special case branch. There might be
2506 some nops and/or hbrps inserted, so we test after 10
2507 insns. */
2508 if (addr > 10 * 4)
2509 SCHED_ON_EVEN_P (insn) = 1;
2510 }
2511
2512 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2513 return;
2514
2515
2516 if (addr + length >= 32 * 4)
2517 {
2518 gcc_assert (before_4 && before_16);
2519 if (hbrp_addr0 > 4 * 4)
2520 {
2521 insn =
2522 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2523 recog_memoized (insn);
2524 INSN_ADDRESSES_NEW (insn,
2525 INSN_ADDRESSES (INSN_UID (before_4)));
2526 PUT_MODE (insn, GET_MODE (before_4));
2527 PUT_MODE (before_4, TImode);
2528 if (insert_lnop_after & 1)
2529 {
2530 insn = emit_insn_before (gen_lnop (), before_4);
2531 recog_memoized (insn);
2532 INSN_ADDRESSES_NEW (insn,
2533 INSN_ADDRESSES (INSN_UID (before_4)));
2534 PUT_MODE (insn, TImode);
2535 }
2536 }
2537 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2538 && hbrp_addr1 > 16 * 4)
2539 {
2540 insn =
2541 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2542 recog_memoized (insn);
2543 INSN_ADDRESSES_NEW (insn,
2544 INSN_ADDRESSES (INSN_UID (before_16)));
2545 PUT_MODE (insn, GET_MODE (before_16));
2546 PUT_MODE (before_16, TImode);
2547 if (insert_lnop_after & 2)
2548 {
2549 insn = emit_insn_before (gen_lnop (), before_16);
2550 recog_memoized (insn);
2551 INSN_ADDRESSES_NEW (insn,
2552 INSN_ADDRESSES (INSN_UID
2553 (before_16)));
2554 PUT_MODE (insn, TImode);
2555 }
2556 }
2557 return;
2558 }
2559 }
2560 else if (BARRIER_P (insn))
2561 return;
2562
2563 }
2564
2565 /* The SPU might hang when it executes 48 inline instructions after a
2566 hinted branch jumps to its hinted target. The beginning of a
2567 function and the return from a call might have been hinted, and must
2568 be handled as well. To prevent a hang we insert 2 hbrps. The first
2569 should be within 6 insns of the branch target. The second should be
2570 within 22 insns of the branch target. When determining if hbrps are
2571 necessary, we look for only 32 inline instructions, because up to to
2572 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2573 new hbrps, we insert them within 4 and 16 insns of the target. */
2574 static void
2575 insert_hbrp (void)
2576 {
2577 rtx insn;
2578 if (TARGET_SAFE_HINTS)
2579 {
2580 shorten_branches (get_insns ());
2581 /* Insert hbrp at beginning of function */
2582 insn = next_active_insn (get_insns ());
2583 if (insn)
2584 insert_hbrp_for_ilb_runout (insn);
2585 /* Insert hbrp after hinted targets. */
2586 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2587 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2588 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2589 }
2590 }
2591
2592 static int in_spu_reorg;
2593
2594 /* Insert branch hints. There are no branch optimizations after this
2595 pass, so it's safe to set our branch hints now. */
2596 static void
2597 spu_machine_dependent_reorg (void)
2598 {
2599 sbitmap blocks;
2600 basic_block bb;
2601 rtx branch, insn;
2602 rtx branch_target = 0;
2603 int branch_addr = 0, insn_addr, required_dist = 0;
2604 int i;
2605 unsigned int j;
2606
2607 if (!TARGET_BRANCH_HINTS || optimize == 0)
2608 {
2609 /* We still do it for unoptimized code because an external
2610 function might have hinted a call or return. */
2611 insert_hbrp ();
2612 pad_bb ();
2613 return;
2614 }
2615
2616 blocks = sbitmap_alloc (last_basic_block);
2617 sbitmap_zero (blocks);
2618
2619 in_spu_reorg = 1;
2620 compute_bb_for_insn ();
2621
2622 compact_blocks ();
2623
2624 spu_bb_info =
2625 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2626 sizeof (struct spu_bb_info));
2627
2628 /* We need exact insn addresses and lengths. */
2629 shorten_branches (get_insns ());
2630
2631 for (i = n_basic_blocks - 1; i >= 0; i--)
2632 {
2633 bb = BASIC_BLOCK (i);
2634 branch = 0;
2635 if (spu_bb_info[i].prop_jump)
2636 {
2637 branch = spu_bb_info[i].prop_jump;
2638 branch_target = get_branch_target (branch);
2639 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2640 required_dist = spu_hint_dist;
2641 }
2642 /* Search from end of a block to beginning. In this loop, find
2643 jumps which need a branch and emit them only when:
2644 - it's an indirect branch and we're at the insn which sets
2645 the register
2646 - we're at an insn that will invalidate the hint. e.g., a
2647 call, another hint insn, inline asm that clobbers $hbr, and
2648 some inlined operations (divmodsi4). Don't consider jumps
2649 because they are only at the end of a block and are
2650 considered when we are deciding whether to propagate
2651 - we're getting too far away from the branch. The hbr insns
2652 only have a signed 10 bit offset
2653 We go back as far as possible so the branch will be considered
2654 for propagation when we get to the beginning of the block. */
2655 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2656 {
2657 if (INSN_P (insn))
2658 {
2659 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2660 if (branch
2661 && ((GET_CODE (branch_target) == REG
2662 && set_of (branch_target, insn) != NULL_RTX)
2663 || insn_clobbers_hbr (insn)
2664 || branch_addr - insn_addr > 600))
2665 {
2666 rtx next = NEXT_INSN (insn);
2667 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2668 if (insn != BB_END (bb)
2669 && branch_addr - next_addr >= required_dist)
2670 {
2671 if (dump_file)
2672 fprintf (dump_file,
2673 "hint for %i in block %i before %i\n",
2674 INSN_UID (branch), bb->index,
2675 INSN_UID (next));
2676 spu_emit_branch_hint (next, branch, branch_target,
2677 branch_addr - next_addr, blocks);
2678 }
2679 branch = 0;
2680 }
2681
2682 /* JUMP_P will only be true at the end of a block. When
2683 branch is already set it means we've previously decided
2684 to propagate a hint for that branch into this block. */
2685 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2686 {
2687 branch = 0;
2688 if ((branch_target = get_branch_target (insn)))
2689 {
2690 branch = insn;
2691 branch_addr = insn_addr;
2692 required_dist = spu_hint_dist;
2693 }
2694 }
2695 }
2696 if (insn == BB_HEAD (bb))
2697 break;
2698 }
2699
2700 if (branch)
2701 {
2702 /* If we haven't emitted a hint for this branch yet, it might
2703 be profitable to emit it in one of the predecessor blocks,
2704 especially for loops. */
2705 rtx bbend;
2706 basic_block prev = 0, prop = 0, prev2 = 0;
2707 int loop_exit = 0, simple_loop = 0;
2708 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2709
2710 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2711 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2712 prev = EDGE_PRED (bb, j)->src;
2713 else
2714 prev2 = EDGE_PRED (bb, j)->src;
2715
2716 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2717 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2718 loop_exit = 1;
2719 else if (EDGE_SUCC (bb, j)->dest == bb)
2720 simple_loop = 1;
2721
2722 /* If this branch is a loop exit then propagate to previous
2723 fallthru block. This catches the cases when it is a simple
2724 loop or when there is an initial branch into the loop. */
2725 if (prev && (loop_exit || simple_loop)
2726 && prev->loop_depth <= bb->loop_depth)
2727 prop = prev;
2728
2729 /* If there is only one adjacent predecessor. Don't propagate
2730 outside this loop. This loop_depth test isn't perfect, but
2731 I'm not sure the loop_father member is valid at this point. */
2732 else if (prev && single_pred_p (bb)
2733 && prev->loop_depth == bb->loop_depth)
2734 prop = prev;
2735
2736 /* If this is the JOIN block of a simple IF-THEN then
2737 propogate the hint to the HEADER block. */
2738 else if (prev && prev2
2739 && EDGE_COUNT (bb->preds) == 2
2740 && EDGE_COUNT (prev->preds) == 1
2741 && EDGE_PRED (prev, 0)->src == prev2
2742 && prev2->loop_depth == bb->loop_depth
2743 && GET_CODE (branch_target) != REG)
2744 prop = prev;
2745
2746 /* Don't propagate when:
2747 - this is a simple loop and the hint would be too far
2748 - this is not a simple loop and there are 16 insns in
2749 this block already
2750 - the predecessor block ends in a branch that will be
2751 hinted
2752 - the predecessor block ends in an insn that invalidates
2753 the hint */
2754 if (prop
2755 && prop->index >= 0
2756 && (bbend = BB_END (prop))
2757 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2758 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2759 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2760 {
2761 if (dump_file)
2762 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2763 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2764 bb->index, prop->index, bb->loop_depth,
2765 INSN_UID (branch), loop_exit, simple_loop,
2766 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2767
2768 spu_bb_info[prop->index].prop_jump = branch;
2769 spu_bb_info[prop->index].bb_index = i;
2770 }
2771 else if (branch_addr - next_addr >= required_dist)
2772 {
2773 if (dump_file)
2774 fprintf (dump_file, "hint for %i in block %i before %i\n",
2775 INSN_UID (branch), bb->index,
2776 INSN_UID (NEXT_INSN (insn)));
2777 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2778 branch_addr - next_addr, blocks);
2779 }
2780 branch = 0;
2781 }
2782 }
2783 free (spu_bb_info);
2784
2785 if (!sbitmap_empty_p (blocks))
2786 find_many_sub_basic_blocks (blocks);
2787
2788 /* We have to schedule to make sure alignment is ok. */
2789 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2790
2791 /* The hints need to be scheduled, so call it again. */
2792 schedule_insns ();
2793
2794 insert_hbrp ();
2795
2796 pad_bb ();
2797
2798 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2799 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2800 {
2801 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2802 between its branch label and the branch . We don't move the
2803 label because GCC expects it at the beginning of the block. */
2804 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2805 rtx label_ref = XVECEXP (unspec, 0, 0);
2806 rtx label = XEXP (label_ref, 0);
2807 rtx branch;
2808 int offset = 0;
2809 for (branch = NEXT_INSN (label);
2810 !JUMP_P (branch) && !CALL_P (branch);
2811 branch = NEXT_INSN (branch))
2812 if (NONJUMP_INSN_P (branch))
2813 offset += get_attr_length (branch);
2814 if (offset > 0)
2815 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2816 }
2817
2818 if (spu_flag_var_tracking)
2819 {
2820 df_analyze ();
2821 timevar_push (TV_VAR_TRACKING);
2822 variable_tracking_main ();
2823 timevar_pop (TV_VAR_TRACKING);
2824 df_finish_pass (false);
2825 }
2826
2827 free_bb_for_insn ();
2828
2829 in_spu_reorg = 0;
2830 }
2831 \f
2832
2833 /* Insn scheduling routines, primarily for dual issue. */
2834 static int
2835 spu_sched_issue_rate (void)
2836 {
2837 return 2;
2838 }
2839
2840 static int
2841 uses_ls_unit(rtx insn)
2842 {
2843 rtx set = single_set (insn);
2844 if (set != 0
2845 && (GET_CODE (SET_DEST (set)) == MEM
2846 || GET_CODE (SET_SRC (set)) == MEM))
2847 return 1;
2848 return 0;
2849 }
2850
2851 static int
2852 get_pipe (rtx insn)
2853 {
2854 enum attr_type t;
2855 /* Handle inline asm */
2856 if (INSN_CODE (insn) == -1)
2857 return -1;
2858 t = get_attr_type (insn);
2859 switch (t)
2860 {
2861 case TYPE_CONVERT:
2862 return -2;
2863 case TYPE_MULTI0:
2864 return -1;
2865
2866 case TYPE_FX2:
2867 case TYPE_FX3:
2868 case TYPE_SPR:
2869 case TYPE_NOP:
2870 case TYPE_FXB:
2871 case TYPE_FPD:
2872 case TYPE_FP6:
2873 case TYPE_FP7:
2874 return 0;
2875
2876 case TYPE_LNOP:
2877 case TYPE_SHUF:
2878 case TYPE_LOAD:
2879 case TYPE_STORE:
2880 case TYPE_BR:
2881 case TYPE_MULTI1:
2882 case TYPE_HBR:
2883 case TYPE_IPREFETCH:
2884 return 1;
2885 default:
2886 abort ();
2887 }
2888 }
2889
2890
2891 /* haifa-sched.c has a static variable that keeps track of the current
2892 cycle. It is passed to spu_sched_reorder, and we record it here for
2893 use by spu_sched_variable_issue. It won't be accurate if the
2894 scheduler updates it's clock_var between the two calls. */
2895 static int clock_var;
2896
2897 /* This is used to keep track of insn alignment. Set to 0 at the
2898 beginning of each block and increased by the "length" attr of each
2899 insn scheduled. */
2900 static int spu_sched_length;
2901
2902 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2903 ready list appropriately in spu_sched_reorder(). */
2904 static int pipe0_clock;
2905 static int pipe1_clock;
2906
2907 static int prev_clock_var;
2908
2909 static int prev_priority;
2910
2911 /* The SPU needs to load the next ilb sometime during the execution of
2912 the previous ilb. There is a potential conflict if every cycle has a
2913 load or store. To avoid the conflict we make sure the load/store
2914 unit is free for at least one cycle during the execution of insns in
2915 the previous ilb. */
2916 static int spu_ls_first;
2917 static int prev_ls_clock;
2918
2919 static void
2920 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2921 int max_ready ATTRIBUTE_UNUSED)
2922 {
2923 spu_sched_length = 0;
2924 }
2925
2926 static void
2927 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2928 int max_ready ATTRIBUTE_UNUSED)
2929 {
2930 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
2931 {
2932 /* When any block might be at least 8-byte aligned, assume they
2933 will all be at least 8-byte aligned to make sure dual issue
2934 works out correctly. */
2935 spu_sched_length = 0;
2936 }
2937 spu_ls_first = INT_MAX;
2938 clock_var = -1;
2939 prev_ls_clock = -1;
2940 pipe0_clock = -1;
2941 pipe1_clock = -1;
2942 prev_clock_var = -1;
2943 prev_priority = -1;
2944 }
2945
2946 static int
2947 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
2948 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
2949 {
2950 int len;
2951 int p;
2952 if (GET_CODE (PATTERN (insn)) == USE
2953 || GET_CODE (PATTERN (insn)) == CLOBBER
2954 || (len = get_attr_length (insn)) == 0)
2955 return more;
2956
2957 spu_sched_length += len;
2958
2959 /* Reset on inline asm */
2960 if (INSN_CODE (insn) == -1)
2961 {
2962 spu_ls_first = INT_MAX;
2963 pipe0_clock = -1;
2964 pipe1_clock = -1;
2965 return 0;
2966 }
2967 p = get_pipe (insn);
2968 if (p == 0)
2969 pipe0_clock = clock_var;
2970 else
2971 pipe1_clock = clock_var;
2972
2973 if (in_spu_reorg)
2974 {
2975 if (clock_var - prev_ls_clock > 1
2976 || INSN_CODE (insn) == CODE_FOR_iprefetch)
2977 spu_ls_first = INT_MAX;
2978 if (uses_ls_unit (insn))
2979 {
2980 if (spu_ls_first == INT_MAX)
2981 spu_ls_first = spu_sched_length;
2982 prev_ls_clock = clock_var;
2983 }
2984
2985 /* The scheduler hasn't inserted the nop, but we will later on.
2986 Include those nops in spu_sched_length. */
2987 if (prev_clock_var == clock_var && (spu_sched_length & 7))
2988 spu_sched_length += 4;
2989 prev_clock_var = clock_var;
2990
2991 /* more is -1 when called from spu_sched_reorder for new insns
2992 that don't have INSN_PRIORITY */
2993 if (more >= 0)
2994 prev_priority = INSN_PRIORITY (insn);
2995 }
2996
2997 /* Always try issueing more insns. spu_sched_reorder will decide
2998 when the cycle should be advanced. */
2999 return 1;
3000 }
3001
3002 /* This function is called for both TARGET_SCHED_REORDER and
3003 TARGET_SCHED_REORDER2. */
3004 static int
3005 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3006 rtx *ready, int *nreadyp, int clock)
3007 {
3008 int i, nready = *nreadyp;
3009 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3010 rtx insn;
3011
3012 clock_var = clock;
3013
3014 if (nready <= 0 || pipe1_clock >= clock)
3015 return 0;
3016
3017 /* Find any rtl insns that don't generate assembly insns and schedule
3018 them first. */
3019 for (i = nready - 1; i >= 0; i--)
3020 {
3021 insn = ready[i];
3022 if (INSN_CODE (insn) == -1
3023 || INSN_CODE (insn) == CODE_FOR_blockage
3024 || (INSN_P (insn) && get_attr_length (insn) == 0))
3025 {
3026 ready[i] = ready[nready - 1];
3027 ready[nready - 1] = insn;
3028 return 1;
3029 }
3030 }
3031
3032 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3033 for (i = 0; i < nready; i++)
3034 if (INSN_CODE (ready[i]) != -1)
3035 {
3036 insn = ready[i];
3037 switch (get_attr_type (insn))
3038 {
3039 default:
3040 case TYPE_MULTI0:
3041 case TYPE_CONVERT:
3042 case TYPE_FX2:
3043 case TYPE_FX3:
3044 case TYPE_SPR:
3045 case TYPE_NOP:
3046 case TYPE_FXB:
3047 case TYPE_FPD:
3048 case TYPE_FP6:
3049 case TYPE_FP7:
3050 pipe_0 = i;
3051 break;
3052 case TYPE_LOAD:
3053 case TYPE_STORE:
3054 pipe_ls = i;
3055 case TYPE_LNOP:
3056 case TYPE_SHUF:
3057 case TYPE_BR:
3058 case TYPE_MULTI1:
3059 case TYPE_HBR:
3060 pipe_1 = i;
3061 break;
3062 case TYPE_IPREFETCH:
3063 pipe_hbrp = i;
3064 break;
3065 }
3066 }
3067
3068 /* In the first scheduling phase, schedule loads and stores together
3069 to increase the chance they will get merged during postreload CSE. */
3070 if (!reload_completed && pipe_ls >= 0)
3071 {
3072 insn = ready[pipe_ls];
3073 ready[pipe_ls] = ready[nready - 1];
3074 ready[nready - 1] = insn;
3075 return 1;
3076 }
3077
3078 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3079 if (pipe_hbrp >= 0)
3080 pipe_1 = pipe_hbrp;
3081
3082 /* When we have loads/stores in every cycle of the last 15 insns and
3083 we are about to schedule another load/store, emit an hbrp insn
3084 instead. */
3085 if (in_spu_reorg
3086 && spu_sched_length - spu_ls_first >= 4 * 15
3087 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3088 {
3089 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3090 recog_memoized (insn);
3091 if (pipe0_clock < clock)
3092 PUT_MODE (insn, TImode);
3093 spu_sched_variable_issue (file, verbose, insn, -1);
3094 return 0;
3095 }
3096
3097 /* In general, we want to emit nops to increase dual issue, but dual
3098 issue isn't faster when one of the insns could be scheduled later
3099 without effecting the critical path. We look at INSN_PRIORITY to
3100 make a good guess, but it isn't perfect so -mdual-nops=n can be
3101 used to effect it. */
3102 if (in_spu_reorg && spu_dual_nops < 10)
3103 {
3104 /* When we are at an even address and we are not issueing nops to
3105 improve scheduling then we need to advance the cycle. */
3106 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3107 && (spu_dual_nops == 0
3108 || (pipe_1 != -1
3109 && prev_priority >
3110 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3111 return 0;
3112
3113 /* When at an odd address, schedule the highest priority insn
3114 without considering pipeline. */
3115 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3116 && (spu_dual_nops == 0
3117 || (prev_priority >
3118 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3119 return 1;
3120 }
3121
3122
3123 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3124 pipe0 insn in the ready list, schedule it. */
3125 if (pipe0_clock < clock && pipe_0 >= 0)
3126 schedule_i = pipe_0;
3127
3128 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3129 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3130 else
3131 schedule_i = pipe_1;
3132
3133 if (schedule_i > -1)
3134 {
3135 insn = ready[schedule_i];
3136 ready[schedule_i] = ready[nready - 1];
3137 ready[nready - 1] = insn;
3138 return 1;
3139 }
3140 return 0;
3141 }
3142
3143 /* INSN is dependent on DEP_INSN. */
3144 static int
3145 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3146 {
3147 rtx set;
3148
3149 /* The blockage pattern is used to prevent instructions from being
3150 moved across it and has no cost. */
3151 if (INSN_CODE (insn) == CODE_FOR_blockage
3152 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3153 return 0;
3154
3155 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3156 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3157 return 0;
3158
3159 /* Make sure hbrps are spread out. */
3160 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3161 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3162 return 8;
3163
3164 /* Make sure hints and hbrps are 2 cycles apart. */
3165 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3166 || INSN_CODE (insn) == CODE_FOR_hbr)
3167 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3168 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3169 return 2;
3170
3171 /* An hbrp has no real dependency on other insns. */
3172 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3173 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3174 return 0;
3175
3176 /* Assuming that it is unlikely an argument register will be used in
3177 the first cycle of the called function, we reduce the cost for
3178 slightly better scheduling of dep_insn. When not hinted, the
3179 mispredicted branch would hide the cost as well. */
3180 if (CALL_P (insn))
3181 {
3182 rtx target = get_branch_target (insn);
3183 if (GET_CODE (target) != REG || !set_of (target, insn))
3184 return cost - 2;
3185 return cost;
3186 }
3187
3188 /* And when returning from a function, let's assume the return values
3189 are completed sooner too. */
3190 if (CALL_P (dep_insn))
3191 return cost - 2;
3192
3193 /* Make sure an instruction that loads from the back chain is schedule
3194 away from the return instruction so a hint is more likely to get
3195 issued. */
3196 if (INSN_CODE (insn) == CODE_FOR__return
3197 && (set = single_set (dep_insn))
3198 && GET_CODE (SET_DEST (set)) == REG
3199 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3200 return 20;
3201
3202 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3203 scheduler makes every insn in a block anti-dependent on the final
3204 jump_insn. We adjust here so higher cost insns will get scheduled
3205 earlier. */
3206 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3207 return insn_cost (dep_insn) - 3;
3208
3209 return cost;
3210 }
3211 \f
3212 /* Create a CONST_DOUBLE from a string. */
3213 struct rtx_def *
3214 spu_float_const (const char *string, enum machine_mode mode)
3215 {
3216 REAL_VALUE_TYPE value;
3217 value = REAL_VALUE_ATOF (string, mode);
3218 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3219 }
3220
3221 int
3222 spu_constant_address_p (rtx x)
3223 {
3224 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3225 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3226 || GET_CODE (x) == HIGH);
3227 }
3228
3229 static enum spu_immediate
3230 which_immediate_load (HOST_WIDE_INT val)
3231 {
3232 gcc_assert (val == trunc_int_for_mode (val, SImode));
3233
3234 if (val >= -0x8000 && val <= 0x7fff)
3235 return SPU_IL;
3236 if (val >= 0 && val <= 0x3ffff)
3237 return SPU_ILA;
3238 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3239 return SPU_ILH;
3240 if ((val & 0xffff) == 0)
3241 return SPU_ILHU;
3242
3243 return SPU_NONE;
3244 }
3245
3246 /* Return true when OP can be loaded by one of the il instructions, or
3247 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3248 int
3249 immediate_load_p (rtx op, enum machine_mode mode)
3250 {
3251 if (CONSTANT_P (op))
3252 {
3253 enum immediate_class c = classify_immediate (op, mode);
3254 return c == IC_IL1 || c == IC_IL1s
3255 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3256 }
3257 return 0;
3258 }
3259
3260 /* Return true if the first SIZE bytes of arr is a constant that can be
3261 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3262 represent the size and offset of the instruction to use. */
3263 static int
3264 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3265 {
3266 int cpat, run, i, start;
3267 cpat = 1;
3268 run = 0;
3269 start = -1;
3270 for (i = 0; i < size && cpat; i++)
3271 if (arr[i] != i+16)
3272 {
3273 if (!run)
3274 {
3275 start = i;
3276 if (arr[i] == 3)
3277 run = 1;
3278 else if (arr[i] == 2 && arr[i+1] == 3)
3279 run = 2;
3280 else if (arr[i] == 0)
3281 {
3282 while (arr[i+run] == run && i+run < 16)
3283 run++;
3284 if (run != 4 && run != 8)
3285 cpat = 0;
3286 }
3287 else
3288 cpat = 0;
3289 if ((i & (run-1)) != 0)
3290 cpat = 0;
3291 i += run;
3292 }
3293 else
3294 cpat = 0;
3295 }
3296 if (cpat && (run || size < 16))
3297 {
3298 if (run == 0)
3299 run = 1;
3300 if (prun)
3301 *prun = run;
3302 if (pstart)
3303 *pstart = start == -1 ? 16-run : start;
3304 return 1;
3305 }
3306 return 0;
3307 }
3308
3309 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3310 it into a register. MODE is only valid when OP is a CONST_INT. */
3311 static enum immediate_class
3312 classify_immediate (rtx op, enum machine_mode mode)
3313 {
3314 HOST_WIDE_INT val;
3315 unsigned char arr[16];
3316 int i, j, repeated, fsmbi, repeat;
3317
3318 gcc_assert (CONSTANT_P (op));
3319
3320 if (GET_MODE (op) != VOIDmode)
3321 mode = GET_MODE (op);
3322
3323 /* A V4SI const_vector with all identical symbols is ok. */
3324 if (!flag_pic
3325 && mode == V4SImode
3326 && GET_CODE (op) == CONST_VECTOR
3327 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3328 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3329 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3330 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3331 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3332 op = CONST_VECTOR_ELT (op, 0);
3333
3334 switch (GET_CODE (op))
3335 {
3336 case SYMBOL_REF:
3337 case LABEL_REF:
3338 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3339
3340 case CONST:
3341 /* We can never know if the resulting address fits in 18 bits and can be
3342 loaded with ila. For now, assume the address will not overflow if
3343 the displacement is "small" (fits 'K' constraint). */
3344 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3345 {
3346 rtx sym = XEXP (XEXP (op, 0), 0);
3347 rtx cst = XEXP (XEXP (op, 0), 1);
3348
3349 if (GET_CODE (sym) == SYMBOL_REF
3350 && GET_CODE (cst) == CONST_INT
3351 && satisfies_constraint_K (cst))
3352 return IC_IL1s;
3353 }
3354 return IC_IL2s;
3355
3356 case HIGH:
3357 return IC_IL1s;
3358
3359 case CONST_VECTOR:
3360 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3361 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3362 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3363 return IC_POOL;
3364 /* Fall through. */
3365
3366 case CONST_INT:
3367 case CONST_DOUBLE:
3368 constant_to_array (mode, op, arr);
3369
3370 /* Check that each 4-byte slot is identical. */
3371 repeated = 1;
3372 for (i = 4; i < 16; i += 4)
3373 for (j = 0; j < 4; j++)
3374 if (arr[j] != arr[i + j])
3375 repeated = 0;
3376
3377 if (repeated)
3378 {
3379 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3380 val = trunc_int_for_mode (val, SImode);
3381
3382 if (which_immediate_load (val) != SPU_NONE)
3383 return IC_IL1;
3384 }
3385
3386 /* Any mode of 2 bytes or smaller can be loaded with an il
3387 instruction. */
3388 gcc_assert (GET_MODE_SIZE (mode) > 2);
3389
3390 fsmbi = 1;
3391 repeat = 0;
3392 for (i = 0; i < 16 && fsmbi; i++)
3393 if (arr[i] != 0 && repeat == 0)
3394 repeat = arr[i];
3395 else if (arr[i] != 0 && arr[i] != repeat)
3396 fsmbi = 0;
3397 if (fsmbi)
3398 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3399
3400 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3401 return IC_CPAT;
3402
3403 if (repeated)
3404 return IC_IL2;
3405
3406 return IC_POOL;
3407 default:
3408 break;
3409 }
3410 gcc_unreachable ();
3411 }
3412
3413 static enum spu_immediate
3414 which_logical_immediate (HOST_WIDE_INT val)
3415 {
3416 gcc_assert (val == trunc_int_for_mode (val, SImode));
3417
3418 if (val >= -0x200 && val <= 0x1ff)
3419 return SPU_ORI;
3420 if (val >= 0 && val <= 0xffff)
3421 return SPU_IOHL;
3422 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3423 {
3424 val = trunc_int_for_mode (val, HImode);
3425 if (val >= -0x200 && val <= 0x1ff)
3426 return SPU_ORHI;
3427 if ((val & 0xff) == ((val >> 8) & 0xff))
3428 {
3429 val = trunc_int_for_mode (val, QImode);
3430 if (val >= -0x200 && val <= 0x1ff)
3431 return SPU_ORBI;
3432 }
3433 }
3434 return SPU_NONE;
3435 }
3436
3437 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3438 CONST_DOUBLEs. */
3439 static int
3440 const_vector_immediate_p (rtx x)
3441 {
3442 int i;
3443 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3444 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3445 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3446 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3447 return 0;
3448 return 1;
3449 }
3450
3451 int
3452 logical_immediate_p (rtx op, enum machine_mode mode)
3453 {
3454 HOST_WIDE_INT val;
3455 unsigned char arr[16];
3456 int i, j;
3457
3458 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3459 || GET_CODE (op) == CONST_VECTOR);
3460
3461 if (GET_CODE (op) == CONST_VECTOR
3462 && !const_vector_immediate_p (op))
3463 return 0;
3464
3465 if (GET_MODE (op) != VOIDmode)
3466 mode = GET_MODE (op);
3467
3468 constant_to_array (mode, op, arr);
3469
3470 /* Check that bytes are repeated. */
3471 for (i = 4; i < 16; i += 4)
3472 for (j = 0; j < 4; j++)
3473 if (arr[j] != arr[i + j])
3474 return 0;
3475
3476 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3477 val = trunc_int_for_mode (val, SImode);
3478
3479 i = which_logical_immediate (val);
3480 return i != SPU_NONE && i != SPU_IOHL;
3481 }
3482
3483 int
3484 iohl_immediate_p (rtx op, enum machine_mode mode)
3485 {
3486 HOST_WIDE_INT val;
3487 unsigned char arr[16];
3488 int i, j;
3489
3490 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3491 || GET_CODE (op) == CONST_VECTOR);
3492
3493 if (GET_CODE (op) == CONST_VECTOR
3494 && !const_vector_immediate_p (op))
3495 return 0;
3496
3497 if (GET_MODE (op) != VOIDmode)
3498 mode = GET_MODE (op);
3499
3500 constant_to_array (mode, op, arr);
3501
3502 /* Check that bytes are repeated. */
3503 for (i = 4; i < 16; i += 4)
3504 for (j = 0; j < 4; j++)
3505 if (arr[j] != arr[i + j])
3506 return 0;
3507
3508 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3509 val = trunc_int_for_mode (val, SImode);
3510
3511 return val >= 0 && val <= 0xffff;
3512 }
3513
3514 int
3515 arith_immediate_p (rtx op, enum machine_mode mode,
3516 HOST_WIDE_INT low, HOST_WIDE_INT high)
3517 {
3518 HOST_WIDE_INT val;
3519 unsigned char arr[16];
3520 int bytes, i, j;
3521
3522 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3523 || GET_CODE (op) == CONST_VECTOR);
3524
3525 if (GET_CODE (op) == CONST_VECTOR
3526 && !const_vector_immediate_p (op))
3527 return 0;
3528
3529 if (GET_MODE (op) != VOIDmode)
3530 mode = GET_MODE (op);
3531
3532 constant_to_array (mode, op, arr);
3533
3534 if (VECTOR_MODE_P (mode))
3535 mode = GET_MODE_INNER (mode);
3536
3537 bytes = GET_MODE_SIZE (mode);
3538 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3539
3540 /* Check that bytes are repeated. */
3541 for (i = bytes; i < 16; i += bytes)
3542 for (j = 0; j < bytes; j++)
3543 if (arr[j] != arr[i + j])
3544 return 0;
3545
3546 val = arr[0];
3547 for (j = 1; j < bytes; j++)
3548 val = (val << 8) | arr[j];
3549
3550 val = trunc_int_for_mode (val, mode);
3551
3552 return val >= low && val <= high;
3553 }
3554
3555 /* TRUE when op is an immediate and an exact power of 2, and given that
3556 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3557 all entries must be the same. */
3558 bool
3559 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3560 {
3561 enum machine_mode int_mode;
3562 HOST_WIDE_INT val;
3563 unsigned char arr[16];
3564 int bytes, i, j;
3565
3566 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3567 || GET_CODE (op) == CONST_VECTOR);
3568
3569 if (GET_CODE (op) == CONST_VECTOR
3570 && !const_vector_immediate_p (op))
3571 return 0;
3572
3573 if (GET_MODE (op) != VOIDmode)
3574 mode = GET_MODE (op);
3575
3576 constant_to_array (mode, op, arr);
3577
3578 if (VECTOR_MODE_P (mode))
3579 mode = GET_MODE_INNER (mode);
3580
3581 bytes = GET_MODE_SIZE (mode);
3582 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3583
3584 /* Check that bytes are repeated. */
3585 for (i = bytes; i < 16; i += bytes)
3586 for (j = 0; j < bytes; j++)
3587 if (arr[j] != arr[i + j])
3588 return 0;
3589
3590 val = arr[0];
3591 for (j = 1; j < bytes; j++)
3592 val = (val << 8) | arr[j];
3593
3594 val = trunc_int_for_mode (val, int_mode);
3595
3596 /* Currently, we only handle SFmode */
3597 gcc_assert (mode == SFmode);
3598 if (mode == SFmode)
3599 {
3600 int exp = (val >> 23) - 127;
3601 return val > 0 && (val & 0x007fffff) == 0
3602 && exp >= low && exp <= high;
3603 }
3604 return FALSE;
3605 }
3606
3607 /* We accept:
3608 - any 32-bit constant (SImode, SFmode)
3609 - any constant that can be generated with fsmbi (any mode)
3610 - a 64-bit constant where the high and low bits are identical
3611 (DImode, DFmode)
3612 - a 128-bit constant where the four 32-bit words match. */
3613 int
3614 spu_legitimate_constant_p (rtx x)
3615 {
3616 if (GET_CODE (x) == HIGH)
3617 x = XEXP (x, 0);
3618 /* V4SI with all identical symbols is valid. */
3619 if (!flag_pic
3620 && GET_MODE (x) == V4SImode
3621 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3622 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3623 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3624 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3625 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3626 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3627
3628 if (GET_CODE (x) == CONST_VECTOR
3629 && !const_vector_immediate_p (x))
3630 return 0;
3631 return 1;
3632 }
3633
3634 /* Valid address are:
3635 - symbol_ref, label_ref, const
3636 - reg
3637 - reg + const_int, where const_int is 16 byte aligned
3638 - reg + reg, alignment doesn't matter
3639 The alignment matters in the reg+const case because lqd and stqd
3640 ignore the 4 least significant bits of the const. We only care about
3641 16 byte modes because the expand phase will change all smaller MEM
3642 references to TImode. */
3643 static bool
3644 spu_legitimate_address_p (enum machine_mode mode,
3645 rtx x, bool reg_ok_strict)
3646 {
3647 int aligned = GET_MODE_SIZE (mode) >= 16;
3648 if (aligned
3649 && GET_CODE (x) == AND
3650 && GET_CODE (XEXP (x, 1)) == CONST_INT
3651 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3652 x = XEXP (x, 0);
3653 switch (GET_CODE (x))
3654 {
3655 case LABEL_REF:
3656 case SYMBOL_REF:
3657 case CONST:
3658 return !TARGET_LARGE_MEM;
3659
3660 case CONST_INT:
3661 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3662
3663 case SUBREG:
3664 x = XEXP (x, 0);
3665 if (REG_P (x))
3666 return 0;
3667
3668 case REG:
3669 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3670
3671 case PLUS:
3672 case LO_SUM:
3673 {
3674 rtx op0 = XEXP (x, 0);
3675 rtx op1 = XEXP (x, 1);
3676 if (GET_CODE (op0) == SUBREG)
3677 op0 = XEXP (op0, 0);
3678 if (GET_CODE (op1) == SUBREG)
3679 op1 = XEXP (op1, 0);
3680 if (GET_CODE (op0) == REG
3681 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3682 && GET_CODE (op1) == CONST_INT
3683 && INTVAL (op1) >= -0x2000
3684 && INTVAL (op1) <= 0x1fff
3685 && (!aligned || (INTVAL (op1) & 15) == 0))
3686 return TRUE;
3687 if (GET_CODE (op0) == REG
3688 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3689 && GET_CODE (op1) == REG
3690 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3691 return TRUE;
3692 }
3693 break;
3694
3695 default:
3696 break;
3697 }
3698 return FALSE;
3699 }
3700
3701 /* When the address is reg + const_int, force the const_int into a
3702 register. */
3703 rtx
3704 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3705 enum machine_mode mode ATTRIBUTE_UNUSED)
3706 {
3707 rtx op0, op1;
3708 /* Make sure both operands are registers. */
3709 if (GET_CODE (x) == PLUS)
3710 {
3711 op0 = XEXP (x, 0);
3712 op1 = XEXP (x, 1);
3713 if (ALIGNED_SYMBOL_REF_P (op0))
3714 {
3715 op0 = force_reg (Pmode, op0);
3716 mark_reg_pointer (op0, 128);
3717 }
3718 else if (GET_CODE (op0) != REG)
3719 op0 = force_reg (Pmode, op0);
3720 if (ALIGNED_SYMBOL_REF_P (op1))
3721 {
3722 op1 = force_reg (Pmode, op1);
3723 mark_reg_pointer (op1, 128);
3724 }
3725 else if (GET_CODE (op1) != REG)
3726 op1 = force_reg (Pmode, op1);
3727 x = gen_rtx_PLUS (Pmode, op0, op1);
3728 }
3729 return x;
3730 }
3731
3732 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3733 struct attribute_spec.handler. */
3734 static tree
3735 spu_handle_fndecl_attribute (tree * node,
3736 tree name,
3737 tree args ATTRIBUTE_UNUSED,
3738 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3739 {
3740 if (TREE_CODE (*node) != FUNCTION_DECL)
3741 {
3742 warning (0, "%qE attribute only applies to functions",
3743 name);
3744 *no_add_attrs = true;
3745 }
3746
3747 return NULL_TREE;
3748 }
3749
3750 /* Handle the "vector" attribute. */
3751 static tree
3752 spu_handle_vector_attribute (tree * node, tree name,
3753 tree args ATTRIBUTE_UNUSED,
3754 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3755 {
3756 tree type = *node, result = NULL_TREE;
3757 enum machine_mode mode;
3758 int unsigned_p;
3759
3760 while (POINTER_TYPE_P (type)
3761 || TREE_CODE (type) == FUNCTION_TYPE
3762 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3763 type = TREE_TYPE (type);
3764
3765 mode = TYPE_MODE (type);
3766
3767 unsigned_p = TYPE_UNSIGNED (type);
3768 switch (mode)
3769 {
3770 case DImode:
3771 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3772 break;
3773 case SImode:
3774 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3775 break;
3776 case HImode:
3777 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3778 break;
3779 case QImode:
3780 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3781 break;
3782 case SFmode:
3783 result = V4SF_type_node;
3784 break;
3785 case DFmode:
3786 result = V2DF_type_node;
3787 break;
3788 default:
3789 break;
3790 }
3791
3792 /* Propagate qualifiers attached to the element type
3793 onto the vector type. */
3794 if (result && result != type && TYPE_QUALS (type))
3795 result = build_qualified_type (result, TYPE_QUALS (type));
3796
3797 *no_add_attrs = true; /* No need to hang on to the attribute. */
3798
3799 if (!result)
3800 warning (0, "%qE attribute ignored", name);
3801 else
3802 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3803
3804 return NULL_TREE;
3805 }
3806
3807 /* Return nonzero if FUNC is a naked function. */
3808 static int
3809 spu_naked_function_p (tree func)
3810 {
3811 tree a;
3812
3813 if (TREE_CODE (func) != FUNCTION_DECL)
3814 abort ();
3815
3816 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3817 return a != NULL_TREE;
3818 }
3819
3820 int
3821 spu_initial_elimination_offset (int from, int to)
3822 {
3823 int saved_regs_size = spu_saved_regs_size ();
3824 int sp_offset = 0;
3825 if (!current_function_is_leaf || crtl->outgoing_args_size
3826 || get_frame_size () || saved_regs_size)
3827 sp_offset = STACK_POINTER_OFFSET;
3828 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3829 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3830 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3831 return get_frame_size ();
3832 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3833 return sp_offset + crtl->outgoing_args_size
3834 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3835 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3836 return get_frame_size () + saved_regs_size + sp_offset;
3837 else
3838 gcc_unreachable ();
3839 }
3840
3841 rtx
3842 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3843 {
3844 enum machine_mode mode = TYPE_MODE (type);
3845 int byte_size = ((mode == BLKmode)
3846 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3847
3848 /* Make sure small structs are left justified in a register. */
3849 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3850 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3851 {
3852 enum machine_mode smode;
3853 rtvec v;
3854 int i;
3855 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3856 int n = byte_size / UNITS_PER_WORD;
3857 v = rtvec_alloc (nregs);
3858 for (i = 0; i < n; i++)
3859 {
3860 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3861 gen_rtx_REG (TImode,
3862 FIRST_RETURN_REGNUM
3863 + i),
3864 GEN_INT (UNITS_PER_WORD * i));
3865 byte_size -= UNITS_PER_WORD;
3866 }
3867
3868 if (n < nregs)
3869 {
3870 if (byte_size < 4)
3871 byte_size = 4;
3872 smode =
3873 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3874 RTVEC_ELT (v, n) =
3875 gen_rtx_EXPR_LIST (VOIDmode,
3876 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3877 GEN_INT (UNITS_PER_WORD * n));
3878 }
3879 return gen_rtx_PARALLEL (mode, v);
3880 }
3881 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3882 }
3883
3884 rtx
3885 spu_function_arg (CUMULATIVE_ARGS cum,
3886 enum machine_mode mode,
3887 tree type, int named ATTRIBUTE_UNUSED)
3888 {
3889 int byte_size;
3890
3891 if (cum >= MAX_REGISTER_ARGS)
3892 return 0;
3893
3894 byte_size = ((mode == BLKmode)
3895 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3896
3897 /* The ABI does not allow parameters to be passed partially in
3898 reg and partially in stack. */
3899 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3900 return 0;
3901
3902 /* Make sure small structs are left justified in a register. */
3903 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3904 && byte_size < UNITS_PER_WORD && byte_size > 0)
3905 {
3906 enum machine_mode smode;
3907 rtx gr_reg;
3908 if (byte_size < 4)
3909 byte_size = 4;
3910 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3911 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3912 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3913 const0_rtx);
3914 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3915 }
3916 else
3917 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3918 }
3919
3920 /* Variable sized types are passed by reference. */
3921 static bool
3922 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3923 enum machine_mode mode ATTRIBUTE_UNUSED,
3924 const_tree type, bool named ATTRIBUTE_UNUSED)
3925 {
3926 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3927 }
3928 \f
3929
3930 /* Var args. */
3931
3932 /* Create and return the va_list datatype.
3933
3934 On SPU, va_list is an array type equivalent to
3935
3936 typedef struct __va_list_tag
3937 {
3938 void *__args __attribute__((__aligned(16)));
3939 void *__skip __attribute__((__aligned(16)));
3940
3941 } va_list[1];
3942
3943 where __args points to the arg that will be returned by the next
3944 va_arg(), and __skip points to the previous stack frame such that
3945 when __args == __skip we should advance __args by 32 bytes. */
3946 static tree
3947 spu_build_builtin_va_list (void)
3948 {
3949 tree f_args, f_skip, record, type_decl;
3950 bool owp;
3951
3952 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3953
3954 type_decl =
3955 build_decl (BUILTINS_LOCATION,
3956 TYPE_DECL, get_identifier ("__va_list_tag"), record);
3957
3958 f_args = build_decl (BUILTINS_LOCATION,
3959 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3960 f_skip = build_decl (BUILTINS_LOCATION,
3961 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3962
3963 DECL_FIELD_CONTEXT (f_args) = record;
3964 DECL_ALIGN (f_args) = 128;
3965 DECL_USER_ALIGN (f_args) = 1;
3966
3967 DECL_FIELD_CONTEXT (f_skip) = record;
3968 DECL_ALIGN (f_skip) = 128;
3969 DECL_USER_ALIGN (f_skip) = 1;
3970
3971 TREE_CHAIN (record) = type_decl;
3972 TYPE_NAME (record) = type_decl;
3973 TYPE_FIELDS (record) = f_args;
3974 TREE_CHAIN (f_args) = f_skip;
3975
3976 /* We know this is being padded and we want it too. It is an internal
3977 type so hide the warnings from the user. */
3978 owp = warn_padded;
3979 warn_padded = false;
3980
3981 layout_type (record);
3982
3983 warn_padded = owp;
3984
3985 /* The correct type is an array type of one element. */
3986 return build_array_type (record, build_index_type (size_zero_node));
3987 }
3988
3989 /* Implement va_start by filling the va_list structure VALIST.
3990 NEXTARG points to the first anonymous stack argument.
3991
3992 The following global variables are used to initialize
3993 the va_list structure:
3994
3995 crtl->args.info;
3996 the CUMULATIVE_ARGS for this function
3997
3998 crtl->args.arg_offset_rtx:
3999 holds the offset of the first anonymous stack argument
4000 (relative to the virtual arg pointer). */
4001
4002 static void
4003 spu_va_start (tree valist, rtx nextarg)
4004 {
4005 tree f_args, f_skip;
4006 tree args, skip, t;
4007
4008 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4009 f_skip = TREE_CHAIN (f_args);
4010
4011 valist = build_va_arg_indirect_ref (valist);
4012 args =
4013 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4014 skip =
4015 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4016
4017 /* Find the __args area. */
4018 t = make_tree (TREE_TYPE (args), nextarg);
4019 if (crtl->args.pretend_args_size > 0)
4020 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4021 size_int (-STACK_POINTER_OFFSET));
4022 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4023 TREE_SIDE_EFFECTS (t) = 1;
4024 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4025
4026 /* Find the __skip area. */
4027 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4028 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4029 size_int (crtl->args.pretend_args_size
4030 - STACK_POINTER_OFFSET));
4031 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4032 TREE_SIDE_EFFECTS (t) = 1;
4033 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4034 }
4035
4036 /* Gimplify va_arg by updating the va_list structure
4037 VALIST as required to retrieve an argument of type
4038 TYPE, and returning that argument.
4039
4040 ret = va_arg(VALIST, TYPE);
4041
4042 generates code equivalent to:
4043
4044 paddedsize = (sizeof(TYPE) + 15) & -16;
4045 if (VALIST.__args + paddedsize > VALIST.__skip
4046 && VALIST.__args <= VALIST.__skip)
4047 addr = VALIST.__skip + 32;
4048 else
4049 addr = VALIST.__args;
4050 VALIST.__args = addr + paddedsize;
4051 ret = *(TYPE *)addr;
4052 */
4053 static tree
4054 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4055 gimple_seq * post_p ATTRIBUTE_UNUSED)
4056 {
4057 tree f_args, f_skip;
4058 tree args, skip;
4059 HOST_WIDE_INT size, rsize;
4060 tree paddedsize, addr, tmp;
4061 bool pass_by_reference_p;
4062
4063 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4064 f_skip = TREE_CHAIN (f_args);
4065
4066 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4067 args =
4068 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4069 skip =
4070 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4071
4072 addr = create_tmp_var (ptr_type_node, "va_arg");
4073 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4074
4075 /* if an object is dynamically sized, a pointer to it is passed
4076 instead of the object itself. */
4077 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4078 false);
4079 if (pass_by_reference_p)
4080 type = build_pointer_type (type);
4081 size = int_size_in_bytes (type);
4082 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4083
4084 /* build conditional expression to calculate addr. The expression
4085 will be gimplified later. */
4086 paddedsize = size_int (rsize);
4087 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4088 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4089 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4090 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4091 unshare_expr (skip)));
4092
4093 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4094 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4095 size_int (32)), unshare_expr (args));
4096
4097 gimplify_assign (addr, tmp, pre_p);
4098
4099 /* update VALIST.__args */
4100 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4101 gimplify_assign (unshare_expr (args), tmp, pre_p);
4102
4103 addr = fold_convert (build_pointer_type (type), addr);
4104
4105 if (pass_by_reference_p)
4106 addr = build_va_arg_indirect_ref (addr);
4107
4108 return build_va_arg_indirect_ref (addr);
4109 }
4110
4111 /* Save parameter registers starting with the register that corresponds
4112 to the first unnamed parameters. If the first unnamed parameter is
4113 in the stack then save no registers. Set pretend_args_size to the
4114 amount of space needed to save the registers. */
4115 void
4116 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4117 tree type, int *pretend_size, int no_rtl)
4118 {
4119 if (!no_rtl)
4120 {
4121 rtx tmp;
4122 int regno;
4123 int offset;
4124 int ncum = *cum;
4125
4126 /* cum currently points to the last named argument, we want to
4127 start at the next argument. */
4128 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
4129
4130 offset = -STACK_POINTER_OFFSET;
4131 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4132 {
4133 tmp = gen_frame_mem (V4SImode,
4134 plus_constant (virtual_incoming_args_rtx,
4135 offset));
4136 emit_move_insn (tmp,
4137 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4138 offset += 16;
4139 }
4140 *pretend_size = offset + STACK_POINTER_OFFSET;
4141 }
4142 }
4143 \f
4144 void
4145 spu_conditional_register_usage (void)
4146 {
4147 if (flag_pic)
4148 {
4149 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4150 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4151 }
4152 }
4153
4154 /* This is called any time we inspect the alignment of a register for
4155 addresses. */
4156 static int
4157 reg_aligned_for_addr (rtx x)
4158 {
4159 int regno =
4160 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4161 return REGNO_POINTER_ALIGN (regno) >= 128;
4162 }
4163
4164 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4165 into its SYMBOL_REF_FLAGS. */
4166 static void
4167 spu_encode_section_info (tree decl, rtx rtl, int first)
4168 {
4169 default_encode_section_info (decl, rtl, first);
4170
4171 /* If a variable has a forced alignment to < 16 bytes, mark it with
4172 SYMBOL_FLAG_ALIGN1. */
4173 if (TREE_CODE (decl) == VAR_DECL
4174 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4175 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4176 }
4177
4178 /* Return TRUE if we are certain the mem refers to a complete object
4179 which is both 16-byte aligned and padded to a 16-byte boundary. This
4180 would make it safe to store with a single instruction.
4181 We guarantee the alignment and padding for static objects by aligning
4182 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4183 FIXME: We currently cannot guarantee this for objects on the stack
4184 because assign_parm_setup_stack calls assign_stack_local with the
4185 alignment of the parameter mode and in that case the alignment never
4186 gets adjusted by LOCAL_ALIGNMENT. */
4187 static int
4188 store_with_one_insn_p (rtx mem)
4189 {
4190 enum machine_mode mode = GET_MODE (mem);
4191 rtx addr = XEXP (mem, 0);
4192 if (mode == BLKmode)
4193 return 0;
4194 if (GET_MODE_SIZE (mode) >= 16)
4195 return 1;
4196 /* Only static objects. */
4197 if (GET_CODE (addr) == SYMBOL_REF)
4198 {
4199 /* We use the associated declaration to make sure the access is
4200 referring to the whole object.
4201 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4202 if it is necessary. Will there be cases where one exists, and
4203 the other does not? Will there be cases where both exist, but
4204 have different types? */
4205 tree decl = MEM_EXPR (mem);
4206 if (decl
4207 && TREE_CODE (decl) == VAR_DECL
4208 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4209 return 1;
4210 decl = SYMBOL_REF_DECL (addr);
4211 if (decl
4212 && TREE_CODE (decl) == VAR_DECL
4213 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4214 return 1;
4215 }
4216 return 0;
4217 }
4218
4219 /* Return 1 when the address is not valid for a simple load and store as
4220 required by the '_mov*' patterns. We could make this less strict
4221 for loads, but we prefer mem's to look the same so they are more
4222 likely to be merged. */
4223 static int
4224 address_needs_split (rtx mem)
4225 {
4226 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4227 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4228 || !(store_with_one_insn_p (mem)
4229 || mem_is_padded_component_ref (mem))))
4230 return 1;
4231
4232 return 0;
4233 }
4234
4235 int
4236 spu_expand_mov (rtx * ops, enum machine_mode mode)
4237 {
4238 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4239 abort ();
4240
4241 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4242 {
4243 rtx from = SUBREG_REG (ops[1]);
4244 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4245
4246 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4247 && GET_MODE_CLASS (imode) == MODE_INT
4248 && subreg_lowpart_p (ops[1]));
4249
4250 if (GET_MODE_SIZE (imode) < 4)
4251 imode = SImode;
4252 if (imode != GET_MODE (from))
4253 from = gen_rtx_SUBREG (imode, from, 0);
4254
4255 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4256 {
4257 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
4258 emit_insn (GEN_FCN (icode) (ops[0], from));
4259 }
4260 else
4261 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4262 return 1;
4263 }
4264
4265 /* At least one of the operands needs to be a register. */
4266 if ((reload_in_progress | reload_completed) == 0
4267 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4268 {
4269 rtx temp = force_reg (mode, ops[1]);
4270 emit_move_insn (ops[0], temp);
4271 return 1;
4272 }
4273 if (reload_in_progress || reload_completed)
4274 {
4275 if (CONSTANT_P (ops[1]))
4276 return spu_split_immediate (ops);
4277 return 0;
4278 }
4279
4280 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4281 extend them. */
4282 if (GET_CODE (ops[1]) == CONST_INT)
4283 {
4284 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4285 if (val != INTVAL (ops[1]))
4286 {
4287 emit_move_insn (ops[0], GEN_INT (val));
4288 return 1;
4289 }
4290 }
4291 if (MEM_P (ops[0]))
4292 return spu_split_store (ops);
4293 if (MEM_P (ops[1]))
4294 return spu_split_load (ops);
4295
4296 return 0;
4297 }
4298
4299 static void
4300 spu_convert_move (rtx dst, rtx src)
4301 {
4302 enum machine_mode mode = GET_MODE (dst);
4303 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4304 rtx reg;
4305 gcc_assert (GET_MODE (src) == TImode);
4306 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4307 emit_insn (gen_rtx_SET (VOIDmode, reg,
4308 gen_rtx_TRUNCATE (int_mode,
4309 gen_rtx_LSHIFTRT (TImode, src,
4310 GEN_INT (int_mode == DImode ? 64 : 96)))));
4311 if (int_mode != mode)
4312 {
4313 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4314 emit_move_insn (dst, reg);
4315 }
4316 }
4317
4318 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4319 the address from SRC and SRC+16. Return a REG or CONST_INT that
4320 specifies how many bytes to rotate the loaded registers, plus any
4321 extra from EXTRA_ROTQBY. The address and rotate amounts are
4322 normalized to improve merging of loads and rotate computations. */
4323 static rtx
4324 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4325 {
4326 rtx addr = XEXP (src, 0);
4327 rtx p0, p1, rot, addr0, addr1;
4328 int rot_amt;
4329
4330 rot = 0;
4331 rot_amt = 0;
4332
4333 if (MEM_ALIGN (src) >= 128)
4334 /* Address is already aligned; simply perform a TImode load. */ ;
4335 else if (GET_CODE (addr) == PLUS)
4336 {
4337 /* 8 cases:
4338 aligned reg + aligned reg => lqx
4339 aligned reg + unaligned reg => lqx, rotqby
4340 aligned reg + aligned const => lqd
4341 aligned reg + unaligned const => lqd, rotqbyi
4342 unaligned reg + aligned reg => lqx, rotqby
4343 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4344 unaligned reg + aligned const => lqd, rotqby
4345 unaligned reg + unaligned const -> not allowed by legitimate address
4346 */
4347 p0 = XEXP (addr, 0);
4348 p1 = XEXP (addr, 1);
4349 if (!reg_aligned_for_addr (p0))
4350 {
4351 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4352 {
4353 rot = gen_reg_rtx (SImode);
4354 emit_insn (gen_addsi3 (rot, p0, p1));
4355 }
4356 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4357 {
4358 if (INTVAL (p1) > 0
4359 && REG_POINTER (p0)
4360 && INTVAL (p1) * BITS_PER_UNIT
4361 < REGNO_POINTER_ALIGN (REGNO (p0)))
4362 {
4363 rot = gen_reg_rtx (SImode);
4364 emit_insn (gen_addsi3 (rot, p0, p1));
4365 addr = p0;
4366 }
4367 else
4368 {
4369 rtx x = gen_reg_rtx (SImode);
4370 emit_move_insn (x, p1);
4371 if (!spu_arith_operand (p1, SImode))
4372 p1 = x;
4373 rot = gen_reg_rtx (SImode);
4374 emit_insn (gen_addsi3 (rot, p0, p1));
4375 addr = gen_rtx_PLUS (Pmode, p0, x);
4376 }
4377 }
4378 else
4379 rot = p0;
4380 }
4381 else
4382 {
4383 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4384 {
4385 rot_amt = INTVAL (p1) & 15;
4386 if (INTVAL (p1) & -16)
4387 {
4388 p1 = GEN_INT (INTVAL (p1) & -16);
4389 addr = gen_rtx_PLUS (SImode, p0, p1);
4390 }
4391 else
4392 addr = p0;
4393 }
4394 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4395 rot = p1;
4396 }
4397 }
4398 else if (REG_P (addr))
4399 {
4400 if (!reg_aligned_for_addr (addr))
4401 rot = addr;
4402 }
4403 else if (GET_CODE (addr) == CONST)
4404 {
4405 if (GET_CODE (XEXP (addr, 0)) == PLUS
4406 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4407 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4408 {
4409 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4410 if (rot_amt & -16)
4411 addr = gen_rtx_CONST (Pmode,
4412 gen_rtx_PLUS (Pmode,
4413 XEXP (XEXP (addr, 0), 0),
4414 GEN_INT (rot_amt & -16)));
4415 else
4416 addr = XEXP (XEXP (addr, 0), 0);
4417 }
4418 else
4419 {
4420 rot = gen_reg_rtx (Pmode);
4421 emit_move_insn (rot, addr);
4422 }
4423 }
4424 else if (GET_CODE (addr) == CONST_INT)
4425 {
4426 rot_amt = INTVAL (addr);
4427 addr = GEN_INT (rot_amt & -16);
4428 }
4429 else if (!ALIGNED_SYMBOL_REF_P (addr))
4430 {
4431 rot = gen_reg_rtx (Pmode);
4432 emit_move_insn (rot, addr);
4433 }
4434
4435 rot_amt += extra_rotby;
4436
4437 rot_amt &= 15;
4438
4439 if (rot && rot_amt)
4440 {
4441 rtx x = gen_reg_rtx (SImode);
4442 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4443 rot = x;
4444 rot_amt = 0;
4445 }
4446 if (!rot && rot_amt)
4447 rot = GEN_INT (rot_amt);
4448
4449 addr0 = copy_rtx (addr);
4450 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4451 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4452
4453 if (dst1)
4454 {
4455 addr1 = plus_constant (copy_rtx (addr), 16);
4456 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4457 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4458 }
4459
4460 return rot;
4461 }
4462
4463 int
4464 spu_split_load (rtx * ops)
4465 {
4466 enum machine_mode mode = GET_MODE (ops[0]);
4467 rtx addr, load, rot;
4468 int rot_amt;
4469
4470 if (GET_MODE_SIZE (mode) >= 16)
4471 return 0;
4472
4473 addr = XEXP (ops[1], 0);
4474 gcc_assert (GET_CODE (addr) != AND);
4475
4476 if (!address_needs_split (ops[1]))
4477 {
4478 ops[1] = change_address (ops[1], TImode, addr);
4479 load = gen_reg_rtx (TImode);
4480 emit_insn (gen__movti (load, ops[1]));
4481 spu_convert_move (ops[0], load);
4482 return 1;
4483 }
4484
4485 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4486
4487 load = gen_reg_rtx (TImode);
4488 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4489
4490 if (rot)
4491 emit_insn (gen_rotqby_ti (load, load, rot));
4492
4493 spu_convert_move (ops[0], load);
4494 return 1;
4495 }
4496
4497 int
4498 spu_split_store (rtx * ops)
4499 {
4500 enum machine_mode mode = GET_MODE (ops[0]);
4501 rtx reg;
4502 rtx addr, p0, p1, p1_lo, smem;
4503 int aform;
4504 int scalar;
4505
4506 if (GET_MODE_SIZE (mode) >= 16)
4507 return 0;
4508
4509 addr = XEXP (ops[0], 0);
4510 gcc_assert (GET_CODE (addr) != AND);
4511
4512 if (!address_needs_split (ops[0]))
4513 {
4514 reg = gen_reg_rtx (TImode);
4515 emit_insn (gen_spu_convert (reg, ops[1]));
4516 ops[0] = change_address (ops[0], TImode, addr);
4517 emit_move_insn (ops[0], reg);
4518 return 1;
4519 }
4520
4521 if (GET_CODE (addr) == PLUS)
4522 {
4523 /* 8 cases:
4524 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4525 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4526 aligned reg + aligned const => lqd, c?d, shuf, stqx
4527 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4528 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4529 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4530 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4531 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4532 */
4533 aform = 0;
4534 p0 = XEXP (addr, 0);
4535 p1 = p1_lo = XEXP (addr, 1);
4536 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4537 {
4538 p1_lo = GEN_INT (INTVAL (p1) & 15);
4539 if (reg_aligned_for_addr (p0))
4540 {
4541 p1 = GEN_INT (INTVAL (p1) & -16);
4542 if (p1 == const0_rtx)
4543 addr = p0;
4544 else
4545 addr = gen_rtx_PLUS (SImode, p0, p1);
4546 }
4547 else
4548 {
4549 rtx x = gen_reg_rtx (SImode);
4550 emit_move_insn (x, p1);
4551 addr = gen_rtx_PLUS (SImode, p0, x);
4552 }
4553 }
4554 }
4555 else if (REG_P (addr))
4556 {
4557 aform = 0;
4558 p0 = addr;
4559 p1 = p1_lo = const0_rtx;
4560 }
4561 else
4562 {
4563 aform = 1;
4564 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4565 p1 = 0; /* aform doesn't use p1 */
4566 p1_lo = addr;
4567 if (ALIGNED_SYMBOL_REF_P (addr))
4568 p1_lo = const0_rtx;
4569 else if (GET_CODE (addr) == CONST
4570 && GET_CODE (XEXP (addr, 0)) == PLUS
4571 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4572 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4573 {
4574 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4575 if ((v & -16) != 0)
4576 addr = gen_rtx_CONST (Pmode,
4577 gen_rtx_PLUS (Pmode,
4578 XEXP (XEXP (addr, 0), 0),
4579 GEN_INT (v & -16)));
4580 else
4581 addr = XEXP (XEXP (addr, 0), 0);
4582 p1_lo = GEN_INT (v & 15);
4583 }
4584 else if (GET_CODE (addr) == CONST_INT)
4585 {
4586 p1_lo = GEN_INT (INTVAL (addr) & 15);
4587 addr = GEN_INT (INTVAL (addr) & -16);
4588 }
4589 else
4590 {
4591 p1_lo = gen_reg_rtx (SImode);
4592 emit_move_insn (p1_lo, addr);
4593 }
4594 }
4595
4596 reg = gen_reg_rtx (TImode);
4597
4598 scalar = store_with_one_insn_p (ops[0]);
4599 if (!scalar)
4600 {
4601 /* We could copy the flags from the ops[0] MEM to mem here,
4602 We don't because we want this load to be optimized away if
4603 possible, and copying the flags will prevent that in certain
4604 cases, e.g. consider the volatile flag. */
4605
4606 rtx pat = gen_reg_rtx (TImode);
4607 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4608 set_mem_alias_set (lmem, 0);
4609 emit_insn (gen_movti (reg, lmem));
4610
4611 if (!p0 || reg_aligned_for_addr (p0))
4612 p0 = stack_pointer_rtx;
4613 if (!p1_lo)
4614 p1_lo = const0_rtx;
4615
4616 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4617 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4618 }
4619 else
4620 {
4621 if (GET_CODE (ops[1]) == REG)
4622 emit_insn (gen_spu_convert (reg, ops[1]));
4623 else if (GET_CODE (ops[1]) == SUBREG)
4624 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
4625 else
4626 abort ();
4627 }
4628
4629 if (GET_MODE_SIZE (mode) < 4 && scalar)
4630 emit_insn (gen_ashlti3
4631 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
4632
4633 smem = change_address (ops[0], TImode, copy_rtx (addr));
4634 /* We can't use the previous alias set because the memory has changed
4635 size and can potentially overlap objects of other types. */
4636 set_mem_alias_set (smem, 0);
4637
4638 emit_insn (gen_movti (smem, reg));
4639 return 1;
4640 }
4641
4642 /* Return TRUE if X is MEM which is a struct member reference
4643 and the member can safely be loaded and stored with a single
4644 instruction because it is padded. */
4645 static int
4646 mem_is_padded_component_ref (rtx x)
4647 {
4648 tree t = MEM_EXPR (x);
4649 tree r;
4650 if (!t || TREE_CODE (t) != COMPONENT_REF)
4651 return 0;
4652 t = TREE_OPERAND (t, 1);
4653 if (!t || TREE_CODE (t) != FIELD_DECL
4654 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
4655 return 0;
4656 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
4657 r = DECL_FIELD_CONTEXT (t);
4658 if (!r || TREE_CODE (r) != RECORD_TYPE)
4659 return 0;
4660 /* Make sure they are the same mode */
4661 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
4662 return 0;
4663 /* If there are no following fields then the field alignment assures
4664 the structure is padded to the alignment which means this field is
4665 padded too. */
4666 if (TREE_CHAIN (t) == 0)
4667 return 1;
4668 /* If the following field is also aligned then this field will be
4669 padded. */
4670 t = TREE_CHAIN (t);
4671 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
4672 return 1;
4673 return 0;
4674 }
4675
4676 /* Parse the -mfixed-range= option string. */
4677 static void
4678 fix_range (const char *const_str)
4679 {
4680 int i, first, last;
4681 char *str, *dash, *comma;
4682
4683 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
4684 REG2 are either register names or register numbers. The effect
4685 of this option is to mark the registers in the range from REG1 to
4686 REG2 as ``fixed'' so they won't be used by the compiler. */
4687
4688 i = strlen (const_str);
4689 str = (char *) alloca (i + 1);
4690 memcpy (str, const_str, i + 1);
4691
4692 while (1)
4693 {
4694 dash = strchr (str, '-');
4695 if (!dash)
4696 {
4697 warning (0, "value of -mfixed-range must have form REG1-REG2");
4698 return;
4699 }
4700 *dash = '\0';
4701 comma = strchr (dash + 1, ',');
4702 if (comma)
4703 *comma = '\0';
4704
4705 first = decode_reg_name (str);
4706 if (first < 0)
4707 {
4708 warning (0, "unknown register name: %s", str);
4709 return;
4710 }
4711
4712 last = decode_reg_name (dash + 1);
4713 if (last < 0)
4714 {
4715 warning (0, "unknown register name: %s", dash + 1);
4716 return;
4717 }
4718
4719 *dash = '-';
4720
4721 if (first > last)
4722 {
4723 warning (0, "%s-%s is an empty range", str, dash + 1);
4724 return;
4725 }
4726
4727 for (i = first; i <= last; ++i)
4728 fixed_regs[i] = call_used_regs[i] = 1;
4729
4730 if (!comma)
4731 break;
4732
4733 *comma = ',';
4734 str = comma + 1;
4735 }
4736 }
4737
4738 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4739 can be generated using the fsmbi instruction. */
4740 int
4741 fsmbi_const_p (rtx x)
4742 {
4743 if (CONSTANT_P (x))
4744 {
4745 /* We can always choose TImode for CONST_INT because the high bits
4746 of an SImode will always be all 1s, i.e., valid for fsmbi. */
4747 enum immediate_class c = classify_immediate (x, TImode);
4748 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
4749 }
4750 return 0;
4751 }
4752
4753 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
4754 can be generated using the cbd, chd, cwd or cdd instruction. */
4755 int
4756 cpat_const_p (rtx x, enum machine_mode mode)
4757 {
4758 if (CONSTANT_P (x))
4759 {
4760 enum immediate_class c = classify_immediate (x, mode);
4761 return c == IC_CPAT;
4762 }
4763 return 0;
4764 }
4765
4766 rtx
4767 gen_cpat_const (rtx * ops)
4768 {
4769 unsigned char dst[16];
4770 int i, offset, shift, isize;
4771 if (GET_CODE (ops[3]) != CONST_INT
4772 || GET_CODE (ops[2]) != CONST_INT
4773 || (GET_CODE (ops[1]) != CONST_INT
4774 && GET_CODE (ops[1]) != REG))
4775 return 0;
4776 if (GET_CODE (ops[1]) == REG
4777 && (!REG_POINTER (ops[1])
4778 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
4779 return 0;
4780
4781 for (i = 0; i < 16; i++)
4782 dst[i] = i + 16;
4783 isize = INTVAL (ops[3]);
4784 if (isize == 1)
4785 shift = 3;
4786 else if (isize == 2)
4787 shift = 2;
4788 else
4789 shift = 0;
4790 offset = (INTVAL (ops[2]) +
4791 (GET_CODE (ops[1]) ==
4792 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
4793 for (i = 0; i < isize; i++)
4794 dst[offset + i] = i + shift;
4795 return array_to_constant (TImode, dst);
4796 }
4797
4798 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
4799 array. Use MODE for CONST_INT's. When the constant's mode is smaller
4800 than 16 bytes, the value is repeated across the rest of the array. */
4801 void
4802 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4803 {
4804 HOST_WIDE_INT val;
4805 int i, j, first;
4806
4807 memset (arr, 0, 16);
4808 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4809 if (GET_CODE (x) == CONST_INT
4810 || (GET_CODE (x) == CONST_DOUBLE
4811 && (mode == SFmode || mode == DFmode)))
4812 {
4813 gcc_assert (mode != VOIDmode && mode != BLKmode);
4814
4815 if (GET_CODE (x) == CONST_DOUBLE)
4816 val = const_double_to_hwint (x);
4817 else
4818 val = INTVAL (x);
4819 first = GET_MODE_SIZE (mode) - 1;
4820 for (i = first; i >= 0; i--)
4821 {
4822 arr[i] = val & 0xff;
4823 val >>= 8;
4824 }
4825 /* Splat the constant across the whole array. */
4826 for (j = 0, i = first + 1; i < 16; i++)
4827 {
4828 arr[i] = arr[j];
4829 j = (j == first) ? 0 : j + 1;
4830 }
4831 }
4832 else if (GET_CODE (x) == CONST_DOUBLE)
4833 {
4834 val = CONST_DOUBLE_LOW (x);
4835 for (i = 15; i >= 8; i--)
4836 {
4837 arr[i] = val & 0xff;
4838 val >>= 8;
4839 }
4840 val = CONST_DOUBLE_HIGH (x);
4841 for (i = 7; i >= 0; i--)
4842 {
4843 arr[i] = val & 0xff;
4844 val >>= 8;
4845 }
4846 }
4847 else if (GET_CODE (x) == CONST_VECTOR)
4848 {
4849 int units;
4850 rtx elt;
4851 mode = GET_MODE_INNER (mode);
4852 units = CONST_VECTOR_NUNITS (x);
4853 for (i = 0; i < units; i++)
4854 {
4855 elt = CONST_VECTOR_ELT (x, i);
4856 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4857 {
4858 if (GET_CODE (elt) == CONST_DOUBLE)
4859 val = const_double_to_hwint (elt);
4860 else
4861 val = INTVAL (elt);
4862 first = GET_MODE_SIZE (mode) - 1;
4863 if (first + i * GET_MODE_SIZE (mode) > 16)
4864 abort ();
4865 for (j = first; j >= 0; j--)
4866 {
4867 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4868 val >>= 8;
4869 }
4870 }
4871 }
4872 }
4873 else
4874 gcc_unreachable();
4875 }
4876
4877 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4878 smaller than 16 bytes, use the bytes that would represent that value
4879 in a register, e.g., for QImode return the value of arr[3]. */
4880 rtx
4881 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4882 {
4883 enum machine_mode inner_mode;
4884 rtvec v;
4885 int units, size, i, j, k;
4886 HOST_WIDE_INT val;
4887
4888 if (GET_MODE_CLASS (mode) == MODE_INT
4889 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4890 {
4891 j = GET_MODE_SIZE (mode);
4892 i = j < 4 ? 4 - j : 0;
4893 for (val = 0; i < j; i++)
4894 val = (val << 8) | arr[i];
4895 val = trunc_int_for_mode (val, mode);
4896 return GEN_INT (val);
4897 }
4898
4899 if (mode == TImode)
4900 {
4901 HOST_WIDE_INT high;
4902 for (i = high = 0; i < 8; i++)
4903 high = (high << 8) | arr[i];
4904 for (i = 8, val = 0; i < 16; i++)
4905 val = (val << 8) | arr[i];
4906 return immed_double_const (val, high, TImode);
4907 }
4908 if (mode == SFmode)
4909 {
4910 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4911 val = trunc_int_for_mode (val, SImode);
4912 return hwint_to_const_double (SFmode, val);
4913 }
4914 if (mode == DFmode)
4915 {
4916 for (i = 0, val = 0; i < 8; i++)
4917 val = (val << 8) | arr[i];
4918 return hwint_to_const_double (DFmode, val);
4919 }
4920
4921 if (!VECTOR_MODE_P (mode))
4922 abort ();
4923
4924 units = GET_MODE_NUNITS (mode);
4925 size = GET_MODE_UNIT_SIZE (mode);
4926 inner_mode = GET_MODE_INNER (mode);
4927 v = rtvec_alloc (units);
4928
4929 for (k = i = 0; i < units; ++i)
4930 {
4931 val = 0;
4932 for (j = 0; j < size; j++, k++)
4933 val = (val << 8) | arr[k];
4934
4935 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4936 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4937 else
4938 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4939 }
4940 if (k > 16)
4941 abort ();
4942
4943 return gen_rtx_CONST_VECTOR (mode, v);
4944 }
4945
4946 static void
4947 reloc_diagnostic (rtx x)
4948 {
4949 tree loc_decl, decl = 0;
4950 const char *msg;
4951 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4952 return;
4953
4954 if (GET_CODE (x) == SYMBOL_REF)
4955 decl = SYMBOL_REF_DECL (x);
4956 else if (GET_CODE (x) == CONST
4957 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4958 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4959
4960 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4961 if (decl && !DECL_P (decl))
4962 decl = 0;
4963
4964 /* We use last_assemble_variable_decl to get line information. It's
4965 not always going to be right and might not even be close, but will
4966 be right for the more common cases. */
4967 if (!last_assemble_variable_decl || in_section == ctors_section)
4968 loc_decl = decl;
4969 else
4970 loc_decl = last_assemble_variable_decl;
4971
4972 /* The decl could be a string constant. */
4973 if (decl && DECL_P (decl))
4974 msg = "%Jcreating run-time relocation for %qD";
4975 else
4976 msg = "creating run-time relocation";
4977
4978 if (TARGET_WARN_RELOC)
4979 warning (0, msg, loc_decl, decl);
4980 else
4981 error (msg, loc_decl, decl);
4982 }
4983
4984 /* Hook into assemble_integer so we can generate an error for run-time
4985 relocations. The SPU ABI disallows them. */
4986 static bool
4987 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4988 {
4989 /* By default run-time relocations aren't supported, but we allow them
4990 in case users support it in their own run-time loader. And we provide
4991 a warning for those users that don't. */
4992 if ((GET_CODE (x) == SYMBOL_REF)
4993 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4994 reloc_diagnostic (x);
4995
4996 return default_assemble_integer (x, size, aligned_p);
4997 }
4998
4999 static void
5000 spu_asm_globalize_label (FILE * file, const char *name)
5001 {
5002 fputs ("\t.global\t", file);
5003 assemble_name (file, name);
5004 fputs ("\n", file);
5005 }
5006
5007 static bool
5008 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5009 bool speed ATTRIBUTE_UNUSED)
5010 {
5011 enum machine_mode mode = GET_MODE (x);
5012 int cost = COSTS_N_INSNS (2);
5013
5014 /* Folding to a CONST_VECTOR will use extra space but there might
5015 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5016 only if it allows us to fold away multiple insns. Changing the cost
5017 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5018 because this cost will only be compared against a single insn.
5019 if (code == CONST_VECTOR)
5020 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5021 */
5022
5023 /* Use defaults for float operations. Not accurate but good enough. */
5024 if (mode == DFmode)
5025 {
5026 *total = COSTS_N_INSNS (13);
5027 return true;
5028 }
5029 if (mode == SFmode)
5030 {
5031 *total = COSTS_N_INSNS (6);
5032 return true;
5033 }
5034 switch (code)
5035 {
5036 case CONST_INT:
5037 if (satisfies_constraint_K (x))
5038 *total = 0;
5039 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5040 *total = COSTS_N_INSNS (1);
5041 else
5042 *total = COSTS_N_INSNS (3);
5043 return true;
5044
5045 case CONST:
5046 *total = COSTS_N_INSNS (3);
5047 return true;
5048
5049 case LABEL_REF:
5050 case SYMBOL_REF:
5051 *total = COSTS_N_INSNS (0);
5052 return true;
5053
5054 case CONST_DOUBLE:
5055 *total = COSTS_N_INSNS (5);
5056 return true;
5057
5058 case FLOAT_EXTEND:
5059 case FLOAT_TRUNCATE:
5060 case FLOAT:
5061 case UNSIGNED_FLOAT:
5062 case FIX:
5063 case UNSIGNED_FIX:
5064 *total = COSTS_N_INSNS (7);
5065 return true;
5066
5067 case PLUS:
5068 if (mode == TImode)
5069 {
5070 *total = COSTS_N_INSNS (9);
5071 return true;
5072 }
5073 break;
5074
5075 case MULT:
5076 cost =
5077 GET_CODE (XEXP (x, 0)) ==
5078 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5079 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5080 {
5081 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5082 {
5083 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5084 cost = COSTS_N_INSNS (14);
5085 if ((val & 0xffff) == 0)
5086 cost = COSTS_N_INSNS (9);
5087 else if (val > 0 && val < 0x10000)
5088 cost = COSTS_N_INSNS (11);
5089 }
5090 }
5091 *total = cost;
5092 return true;
5093 case DIV:
5094 case UDIV:
5095 case MOD:
5096 case UMOD:
5097 *total = COSTS_N_INSNS (20);
5098 return true;
5099 case ROTATE:
5100 case ROTATERT:
5101 case ASHIFT:
5102 case ASHIFTRT:
5103 case LSHIFTRT:
5104 *total = COSTS_N_INSNS (4);
5105 return true;
5106 case UNSPEC:
5107 if (XINT (x, 1) == UNSPEC_CONVERT)
5108 *total = COSTS_N_INSNS (0);
5109 else
5110 *total = COSTS_N_INSNS (4);
5111 return true;
5112 }
5113 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5114 if (GET_MODE_CLASS (mode) == MODE_INT
5115 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5116 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5117 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5118 *total = cost;
5119 return true;
5120 }
5121
5122 static enum machine_mode
5123 spu_unwind_word_mode (void)
5124 {
5125 return SImode;
5126 }
5127
5128 /* Decide whether we can make a sibling call to a function. DECL is the
5129 declaration of the function being targeted by the call and EXP is the
5130 CALL_EXPR representing the call. */
5131 static bool
5132 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5133 {
5134 return decl && !TARGET_LARGE_MEM;
5135 }
5136
5137 /* We need to correctly update the back chain pointer and the Available
5138 Stack Size (which is in the second slot of the sp register.) */
5139 void
5140 spu_allocate_stack (rtx op0, rtx op1)
5141 {
5142 HOST_WIDE_INT v;
5143 rtx chain = gen_reg_rtx (V4SImode);
5144 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5145 rtx sp = gen_reg_rtx (V4SImode);
5146 rtx splatted = gen_reg_rtx (V4SImode);
5147 rtx pat = gen_reg_rtx (TImode);
5148
5149 /* copy the back chain so we can save it back again. */
5150 emit_move_insn (chain, stack_bot);
5151
5152 op1 = force_reg (SImode, op1);
5153
5154 v = 0x1020300010203ll;
5155 emit_move_insn (pat, immed_double_const (v, v, TImode));
5156 emit_insn (gen_shufb (splatted, op1, op1, pat));
5157
5158 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5159 emit_insn (gen_subv4si3 (sp, sp, splatted));
5160
5161 if (flag_stack_check)
5162 {
5163 rtx avail = gen_reg_rtx(SImode);
5164 rtx result = gen_reg_rtx(SImode);
5165 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5166 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5167 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5168 }
5169
5170 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5171
5172 emit_move_insn (stack_bot, chain);
5173
5174 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5175 }
5176
5177 void
5178 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5179 {
5180 static unsigned char arr[16] =
5181 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5182 rtx temp = gen_reg_rtx (SImode);
5183 rtx temp2 = gen_reg_rtx (SImode);
5184 rtx temp3 = gen_reg_rtx (V4SImode);
5185 rtx temp4 = gen_reg_rtx (V4SImode);
5186 rtx pat = gen_reg_rtx (TImode);
5187 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5188
5189 /* Restore the backchain from the first word, sp from the second. */
5190 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5191 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5192
5193 emit_move_insn (pat, array_to_constant (TImode, arr));
5194
5195 /* Compute Available Stack Size for sp */
5196 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5197 emit_insn (gen_shufb (temp3, temp, temp, pat));
5198
5199 /* Compute Available Stack Size for back chain */
5200 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5201 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5202 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5203
5204 emit_insn (gen_addv4si3 (sp, sp, temp3));
5205 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5206 }
5207
5208 static void
5209 spu_init_libfuncs (void)
5210 {
5211 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5212 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5213 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5214 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5215 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5216 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5217 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5218 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5219 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5220 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5221 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5222
5223 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5224 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5225
5226 set_optab_libfunc (smul_optab, TImode, "__multi3");
5227 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5228 set_optab_libfunc (smod_optab, TImode, "__modti3");
5229 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5230 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5231 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5232 }
5233
5234 /* Make a subreg, stripping any existing subreg. We could possibly just
5235 call simplify_subreg, but in this case we know what we want. */
5236 rtx
5237 spu_gen_subreg (enum machine_mode mode, rtx x)
5238 {
5239 if (GET_CODE (x) == SUBREG)
5240 x = SUBREG_REG (x);
5241 if (GET_MODE (x) == mode)
5242 return x;
5243 return gen_rtx_SUBREG (mode, x, 0);
5244 }
5245
5246 static bool
5247 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5248 {
5249 return (TYPE_MODE (type) == BLKmode
5250 && ((type) == 0
5251 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5252 || int_size_in_bytes (type) >
5253 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5254 }
5255 \f
5256 /* Create the built-in types and functions */
5257
5258 enum spu_function_code
5259 {
5260 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5261 #include "spu-builtins.def"
5262 #undef DEF_BUILTIN
5263 NUM_SPU_BUILTINS
5264 };
5265
5266 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5267
5268 struct spu_builtin_description spu_builtins[] = {
5269 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5270 {fcode, icode, name, type, params, NULL_TREE},
5271 #include "spu-builtins.def"
5272 #undef DEF_BUILTIN
5273 };
5274
5275 static void
5276 spu_init_builtins (void)
5277 {
5278 struct spu_builtin_description *d;
5279 unsigned int i;
5280
5281 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5282 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5283 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5284 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5285 V4SF_type_node = build_vector_type (float_type_node, 4);
5286 V2DF_type_node = build_vector_type (double_type_node, 2);
5287
5288 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5289 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5290 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5291 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5292
5293 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5294
5295 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5296 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5297 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5298 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5299 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5300 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5301 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5302 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5303 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5304 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5305 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5306 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5307
5308 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5309 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5310 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5311 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5312 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5313 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5314 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5315 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5316
5317 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5318 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5319
5320 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5321
5322 spu_builtin_types[SPU_BTI_PTR] =
5323 build_pointer_type (build_qualified_type
5324 (void_type_node,
5325 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5326
5327 /* For each builtin we build a new prototype. The tree code will make
5328 sure nodes are shared. */
5329 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5330 {
5331 tree p;
5332 char name[64]; /* build_function will make a copy. */
5333 int parm;
5334
5335 if (d->name == 0)
5336 continue;
5337
5338 /* Find last parm. */
5339 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5340 ;
5341
5342 p = void_list_node;
5343 while (parm > 1)
5344 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5345
5346 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5347
5348 sprintf (name, "__builtin_%s", d->name);
5349 d->fndecl =
5350 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
5351 NULL, NULL_TREE);
5352 if (d->fcode == SPU_MASK_FOR_LOAD)
5353 TREE_READONLY (d->fndecl) = 1;
5354
5355 /* These builtins don't throw. */
5356 TREE_NOTHROW (d->fndecl) = 1;
5357 }
5358 }
5359
5360 void
5361 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5362 {
5363 static unsigned char arr[16] =
5364 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5365
5366 rtx temp = gen_reg_rtx (Pmode);
5367 rtx temp2 = gen_reg_rtx (V4SImode);
5368 rtx temp3 = gen_reg_rtx (V4SImode);
5369 rtx pat = gen_reg_rtx (TImode);
5370 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5371
5372 emit_move_insn (pat, array_to_constant (TImode, arr));
5373
5374 /* Restore the sp. */
5375 emit_move_insn (temp, op1);
5376 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5377
5378 /* Compute available stack size for sp. */
5379 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5380 emit_insn (gen_shufb (temp3, temp, temp, pat));
5381
5382 emit_insn (gen_addv4si3 (sp, sp, temp3));
5383 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5384 }
5385
5386 int
5387 spu_safe_dma (HOST_WIDE_INT channel)
5388 {
5389 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5390 }
5391
5392 void
5393 spu_builtin_splats (rtx ops[])
5394 {
5395 enum machine_mode mode = GET_MODE (ops[0]);
5396 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5397 {
5398 unsigned char arr[16];
5399 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5400 emit_move_insn (ops[0], array_to_constant (mode, arr));
5401 }
5402 else
5403 {
5404 rtx reg = gen_reg_rtx (TImode);
5405 rtx shuf;
5406 if (GET_CODE (ops[1]) != REG
5407 && GET_CODE (ops[1]) != SUBREG)
5408 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5409 switch (mode)
5410 {
5411 case V2DImode:
5412 case V2DFmode:
5413 shuf =
5414 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5415 TImode);
5416 break;
5417 case V4SImode:
5418 case V4SFmode:
5419 shuf =
5420 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5421 TImode);
5422 break;
5423 case V8HImode:
5424 shuf =
5425 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5426 TImode);
5427 break;
5428 case V16QImode:
5429 shuf =
5430 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5431 TImode);
5432 break;
5433 default:
5434 abort ();
5435 }
5436 emit_move_insn (reg, shuf);
5437 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5438 }
5439 }
5440
5441 void
5442 spu_builtin_extract (rtx ops[])
5443 {
5444 enum machine_mode mode;
5445 rtx rot, from, tmp;
5446
5447 mode = GET_MODE (ops[1]);
5448
5449 if (GET_CODE (ops[2]) == CONST_INT)
5450 {
5451 switch (mode)
5452 {
5453 case V16QImode:
5454 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5455 break;
5456 case V8HImode:
5457 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5458 break;
5459 case V4SFmode:
5460 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5461 break;
5462 case V4SImode:
5463 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5464 break;
5465 case V2DImode:
5466 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5467 break;
5468 case V2DFmode:
5469 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5470 break;
5471 default:
5472 abort ();
5473 }
5474 return;
5475 }
5476
5477 from = spu_gen_subreg (TImode, ops[1]);
5478 rot = gen_reg_rtx (TImode);
5479 tmp = gen_reg_rtx (SImode);
5480
5481 switch (mode)
5482 {
5483 case V16QImode:
5484 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5485 break;
5486 case V8HImode:
5487 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5488 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5489 break;
5490 case V4SFmode:
5491 case V4SImode:
5492 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5493 break;
5494 case V2DImode:
5495 case V2DFmode:
5496 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5497 break;
5498 default:
5499 abort ();
5500 }
5501 emit_insn (gen_rotqby_ti (rot, from, tmp));
5502
5503 emit_insn (gen_spu_convert (ops[0], rot));
5504 }
5505
5506 void
5507 spu_builtin_insert (rtx ops[])
5508 {
5509 enum machine_mode mode = GET_MODE (ops[0]);
5510 enum machine_mode imode = GET_MODE_INNER (mode);
5511 rtx mask = gen_reg_rtx (TImode);
5512 rtx offset;
5513
5514 if (GET_CODE (ops[3]) == CONST_INT)
5515 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5516 else
5517 {
5518 offset = gen_reg_rtx (SImode);
5519 emit_insn (gen_mulsi3
5520 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5521 }
5522 emit_insn (gen_cpat
5523 (mask, stack_pointer_rtx, offset,
5524 GEN_INT (GET_MODE_SIZE (imode))));
5525 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5526 }
5527
5528 void
5529 spu_builtin_promote (rtx ops[])
5530 {
5531 enum machine_mode mode, imode;
5532 rtx rot, from, offset;
5533 HOST_WIDE_INT pos;
5534
5535 mode = GET_MODE (ops[0]);
5536 imode = GET_MODE_INNER (mode);
5537
5538 from = gen_reg_rtx (TImode);
5539 rot = spu_gen_subreg (TImode, ops[0]);
5540
5541 emit_insn (gen_spu_convert (from, ops[1]));
5542
5543 if (GET_CODE (ops[2]) == CONST_INT)
5544 {
5545 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5546 if (GET_MODE_SIZE (imode) < 4)
5547 pos += 4 - GET_MODE_SIZE (imode);
5548 offset = GEN_INT (pos & 15);
5549 }
5550 else
5551 {
5552 offset = gen_reg_rtx (SImode);
5553 switch (mode)
5554 {
5555 case V16QImode:
5556 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5557 break;
5558 case V8HImode:
5559 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5560 emit_insn (gen_addsi3 (offset, offset, offset));
5561 break;
5562 case V4SFmode:
5563 case V4SImode:
5564 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5565 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5566 break;
5567 case V2DImode:
5568 case V2DFmode:
5569 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5570 break;
5571 default:
5572 abort ();
5573 }
5574 }
5575 emit_insn (gen_rotqby_ti (rot, from, offset));
5576 }
5577
5578 void
5579 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
5580 {
5581 rtx shuf = gen_reg_rtx (V4SImode);
5582 rtx insn = gen_reg_rtx (V4SImode);
5583 rtx shufc;
5584 rtx insnc;
5585 rtx mem;
5586
5587 fnaddr = force_reg (SImode, fnaddr);
5588 cxt = force_reg (SImode, cxt);
5589
5590 if (TARGET_LARGE_MEM)
5591 {
5592 rtx rotl = gen_reg_rtx (V4SImode);
5593 rtx mask = gen_reg_rtx (V4SImode);
5594 rtx bi = gen_reg_rtx (SImode);
5595 unsigned char shufa[16] = {
5596 2, 3, 0, 1, 18, 19, 16, 17,
5597 0, 1, 2, 3, 16, 17, 18, 19
5598 };
5599 unsigned char insna[16] = {
5600 0x41, 0, 0, 79,
5601 0x41, 0, 0, STATIC_CHAIN_REGNUM,
5602 0x60, 0x80, 0, 79,
5603 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
5604 };
5605
5606 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
5607 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5608
5609 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
5610 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
5611 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
5612 emit_insn (gen_selb (insn, insnc, rotl, mask));
5613
5614 mem = memory_address (Pmode, tramp);
5615 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5616
5617 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
5618 mem = memory_address (Pmode, plus_constant (tramp, 16));
5619 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
5620 }
5621 else
5622 {
5623 rtx scxt = gen_reg_rtx (SImode);
5624 rtx sfnaddr = gen_reg_rtx (SImode);
5625 unsigned char insna[16] = {
5626 0x42, 0, 0, STATIC_CHAIN_REGNUM,
5627 0x30, 0, 0, 0,
5628 0, 0, 0, 0,
5629 0, 0, 0, 0
5630 };
5631
5632 shufc = gen_reg_rtx (TImode);
5633 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
5634
5635 /* By or'ing all of cxt with the ila opcode we are assuming cxt
5636 fits 18 bits and the last 4 are zeros. This will be true if
5637 the stack pointer is initialized to 0x3fff0 at program start,
5638 otherwise the ila instruction will be garbage. */
5639
5640 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
5641 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
5642 emit_insn (gen_cpat
5643 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
5644 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
5645 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
5646
5647 mem = memory_address (Pmode, tramp);
5648 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
5649
5650 }
5651 emit_insn (gen_sync ());
5652 }
5653
5654 void
5655 spu_expand_sign_extend (rtx ops[])
5656 {
5657 unsigned char arr[16];
5658 rtx pat = gen_reg_rtx (TImode);
5659 rtx sign, c;
5660 int i, last;
5661 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
5662 if (GET_MODE (ops[1]) == QImode)
5663 {
5664 sign = gen_reg_rtx (HImode);
5665 emit_insn (gen_extendqihi2 (sign, ops[1]));
5666 for (i = 0; i < 16; i++)
5667 arr[i] = 0x12;
5668 arr[last] = 0x13;
5669 }
5670 else
5671 {
5672 for (i = 0; i < 16; i++)
5673 arr[i] = 0x10;
5674 switch (GET_MODE (ops[1]))
5675 {
5676 case HImode:
5677 sign = gen_reg_rtx (SImode);
5678 emit_insn (gen_extendhisi2 (sign, ops[1]));
5679 arr[last] = 0x03;
5680 arr[last - 1] = 0x02;
5681 break;
5682 case SImode:
5683 sign = gen_reg_rtx (SImode);
5684 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
5685 for (i = 0; i < 4; i++)
5686 arr[last - i] = 3 - i;
5687 break;
5688 case DImode:
5689 sign = gen_reg_rtx (SImode);
5690 c = gen_reg_rtx (SImode);
5691 emit_insn (gen_spu_convert (c, ops[1]));
5692 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
5693 for (i = 0; i < 8; i++)
5694 arr[last - i] = 7 - i;
5695 break;
5696 default:
5697 abort ();
5698 }
5699 }
5700 emit_move_insn (pat, array_to_constant (TImode, arr));
5701 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
5702 }
5703
5704 /* expand vector initialization. If there are any constant parts,
5705 load constant parts first. Then load any non-constant parts. */
5706 void
5707 spu_expand_vector_init (rtx target, rtx vals)
5708 {
5709 enum machine_mode mode = GET_MODE (target);
5710 int n_elts = GET_MODE_NUNITS (mode);
5711 int n_var = 0;
5712 bool all_same = true;
5713 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
5714 int i;
5715
5716 first = XVECEXP (vals, 0, 0);
5717 for (i = 0; i < n_elts; ++i)
5718 {
5719 x = XVECEXP (vals, 0, i);
5720 if (!(CONST_INT_P (x)
5721 || GET_CODE (x) == CONST_DOUBLE
5722 || GET_CODE (x) == CONST_FIXED))
5723 ++n_var;
5724 else
5725 {
5726 if (first_constant == NULL_RTX)
5727 first_constant = x;
5728 }
5729 if (i > 0 && !rtx_equal_p (x, first))
5730 all_same = false;
5731 }
5732
5733 /* if all elements are the same, use splats to repeat elements */
5734 if (all_same)
5735 {
5736 if (!CONSTANT_P (first)
5737 && !register_operand (first, GET_MODE (x)))
5738 first = force_reg (GET_MODE (first), first);
5739 emit_insn (gen_spu_splats (target, first));
5740 return;
5741 }
5742
5743 /* load constant parts */
5744 if (n_var != n_elts)
5745 {
5746 if (n_var == 0)
5747 {
5748 emit_move_insn (target,
5749 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
5750 }
5751 else
5752 {
5753 rtx constant_parts_rtx = copy_rtx (vals);
5754
5755 gcc_assert (first_constant != NULL_RTX);
5756 /* fill empty slots with the first constant, this increases
5757 our chance of using splats in the recursive call below. */
5758 for (i = 0; i < n_elts; ++i)
5759 {
5760 x = XVECEXP (constant_parts_rtx, 0, i);
5761 if (!(CONST_INT_P (x)
5762 || GET_CODE (x) == CONST_DOUBLE
5763 || GET_CODE (x) == CONST_FIXED))
5764 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
5765 }
5766
5767 spu_expand_vector_init (target, constant_parts_rtx);
5768 }
5769 }
5770
5771 /* load variable parts */
5772 if (n_var != 0)
5773 {
5774 rtx insert_operands[4];
5775
5776 insert_operands[0] = target;
5777 insert_operands[2] = target;
5778 for (i = 0; i < n_elts; ++i)
5779 {
5780 x = XVECEXP (vals, 0, i);
5781 if (!(CONST_INT_P (x)
5782 || GET_CODE (x) == CONST_DOUBLE
5783 || GET_CODE (x) == CONST_FIXED))
5784 {
5785 if (!register_operand (x, GET_MODE (x)))
5786 x = force_reg (GET_MODE (x), x);
5787 insert_operands[1] = x;
5788 insert_operands[3] = GEN_INT (i);
5789 spu_builtin_insert (insert_operands);
5790 }
5791 }
5792 }
5793 }
5794
5795 /* Return insn index for the vector compare instruction for given CODE,
5796 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
5797
5798 static int
5799 get_vec_cmp_insn (enum rtx_code code,
5800 enum machine_mode dest_mode,
5801 enum machine_mode op_mode)
5802
5803 {
5804 switch (code)
5805 {
5806 case EQ:
5807 if (dest_mode == V16QImode && op_mode == V16QImode)
5808 return CODE_FOR_ceq_v16qi;
5809 if (dest_mode == V8HImode && op_mode == V8HImode)
5810 return CODE_FOR_ceq_v8hi;
5811 if (dest_mode == V4SImode && op_mode == V4SImode)
5812 return CODE_FOR_ceq_v4si;
5813 if (dest_mode == V4SImode && op_mode == V4SFmode)
5814 return CODE_FOR_ceq_v4sf;
5815 if (dest_mode == V2DImode && op_mode == V2DFmode)
5816 return CODE_FOR_ceq_v2df;
5817 break;
5818 case GT:
5819 if (dest_mode == V16QImode && op_mode == V16QImode)
5820 return CODE_FOR_cgt_v16qi;
5821 if (dest_mode == V8HImode && op_mode == V8HImode)
5822 return CODE_FOR_cgt_v8hi;
5823 if (dest_mode == V4SImode && op_mode == V4SImode)
5824 return CODE_FOR_cgt_v4si;
5825 if (dest_mode == V4SImode && op_mode == V4SFmode)
5826 return CODE_FOR_cgt_v4sf;
5827 if (dest_mode == V2DImode && op_mode == V2DFmode)
5828 return CODE_FOR_cgt_v2df;
5829 break;
5830 case GTU:
5831 if (dest_mode == V16QImode && op_mode == V16QImode)
5832 return CODE_FOR_clgt_v16qi;
5833 if (dest_mode == V8HImode && op_mode == V8HImode)
5834 return CODE_FOR_clgt_v8hi;
5835 if (dest_mode == V4SImode && op_mode == V4SImode)
5836 return CODE_FOR_clgt_v4si;
5837 break;
5838 default:
5839 break;
5840 }
5841 return -1;
5842 }
5843
5844 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5845 DMODE is expected destination mode. This is a recursive function. */
5846
5847 static rtx
5848 spu_emit_vector_compare (enum rtx_code rcode,
5849 rtx op0, rtx op1,
5850 enum machine_mode dmode)
5851 {
5852 int vec_cmp_insn;
5853 rtx mask;
5854 enum machine_mode dest_mode;
5855 enum machine_mode op_mode = GET_MODE (op1);
5856
5857 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5858
5859 /* Floating point vector compare instructions uses destination V4SImode.
5860 Double floating point vector compare instructions uses destination V2DImode.
5861 Move destination to appropriate mode later. */
5862 if (dmode == V4SFmode)
5863 dest_mode = V4SImode;
5864 else if (dmode == V2DFmode)
5865 dest_mode = V2DImode;
5866 else
5867 dest_mode = dmode;
5868
5869 mask = gen_reg_rtx (dest_mode);
5870 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5871
5872 if (vec_cmp_insn == -1)
5873 {
5874 bool swap_operands = false;
5875 bool try_again = false;
5876 switch (rcode)
5877 {
5878 case LT:
5879 rcode = GT;
5880 swap_operands = true;
5881 try_again = true;
5882 break;
5883 case LTU:
5884 rcode = GTU;
5885 swap_operands = true;
5886 try_again = true;
5887 break;
5888 case NE:
5889 /* Treat A != B as ~(A==B). */
5890 {
5891 enum insn_code nor_code;
5892 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5893 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5894 gcc_assert (nor_code != CODE_FOR_nothing);
5895 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5896 if (dmode != dest_mode)
5897 {
5898 rtx temp = gen_reg_rtx (dest_mode);
5899 convert_move (temp, mask, 0);
5900 return temp;
5901 }
5902 return mask;
5903 }
5904 break;
5905 case GE:
5906 case GEU:
5907 case LE:
5908 case LEU:
5909 /* Try GT/GTU/LT/LTU OR EQ */
5910 {
5911 rtx c_rtx, eq_rtx;
5912 enum insn_code ior_code;
5913 enum rtx_code new_code;
5914
5915 switch (rcode)
5916 {
5917 case GE: new_code = GT; break;
5918 case GEU: new_code = GTU; break;
5919 case LE: new_code = LT; break;
5920 case LEU: new_code = LTU; break;
5921 default:
5922 gcc_unreachable ();
5923 }
5924
5925 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5926 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5927
5928 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5929 gcc_assert (ior_code != CODE_FOR_nothing);
5930 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5931 if (dmode != dest_mode)
5932 {
5933 rtx temp = gen_reg_rtx (dest_mode);
5934 convert_move (temp, mask, 0);
5935 return temp;
5936 }
5937 return mask;
5938 }
5939 break;
5940 default:
5941 gcc_unreachable ();
5942 }
5943
5944 /* You only get two chances. */
5945 if (try_again)
5946 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5947
5948 gcc_assert (vec_cmp_insn != -1);
5949
5950 if (swap_operands)
5951 {
5952 rtx tmp;
5953 tmp = op0;
5954 op0 = op1;
5955 op1 = tmp;
5956 }
5957 }
5958
5959 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5960 if (dmode != dest_mode)
5961 {
5962 rtx temp = gen_reg_rtx (dest_mode);
5963 convert_move (temp, mask, 0);
5964 return temp;
5965 }
5966 return mask;
5967 }
5968
5969
5970 /* Emit vector conditional expression.
5971 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5972 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5973
5974 int
5975 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5976 rtx cond, rtx cc_op0, rtx cc_op1)
5977 {
5978 enum machine_mode dest_mode = GET_MODE (dest);
5979 enum rtx_code rcode = GET_CODE (cond);
5980 rtx mask;
5981
5982 /* Get the vector mask for the given relational operations. */
5983 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5984
5985 emit_insn(gen_selb (dest, op2, op1, mask));
5986
5987 return 1;
5988 }
5989
5990 static rtx
5991 spu_force_reg (enum machine_mode mode, rtx op)
5992 {
5993 rtx x, r;
5994 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5995 {
5996 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5997 || GET_MODE (op) == BLKmode)
5998 return force_reg (mode, convert_to_mode (mode, op, 0));
5999 abort ();
6000 }
6001
6002 r = force_reg (GET_MODE (op), op);
6003 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6004 {
6005 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6006 if (x)
6007 return x;
6008 }
6009
6010 x = gen_reg_rtx (mode);
6011 emit_insn (gen_spu_convert (x, r));
6012 return x;
6013 }
6014
6015 static void
6016 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6017 {
6018 HOST_WIDE_INT v = 0;
6019 int lsbits;
6020 /* Check the range of immediate operands. */
6021 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6022 {
6023 int range = p - SPU_BTI_7;
6024
6025 if (!CONSTANT_P (op))
6026 error ("%s expects an integer literal in the range [%d, %d].",
6027 d->name,
6028 spu_builtin_range[range].low, spu_builtin_range[range].high);
6029
6030 if (GET_CODE (op) == CONST
6031 && (GET_CODE (XEXP (op, 0)) == PLUS
6032 || GET_CODE (XEXP (op, 0)) == MINUS))
6033 {
6034 v = INTVAL (XEXP (XEXP (op, 0), 1));
6035 op = XEXP (XEXP (op, 0), 0);
6036 }
6037 else if (GET_CODE (op) == CONST_INT)
6038 v = INTVAL (op);
6039 else if (GET_CODE (op) == CONST_VECTOR
6040 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6041 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6042
6043 /* The default for v is 0 which is valid in every range. */
6044 if (v < spu_builtin_range[range].low
6045 || v > spu_builtin_range[range].high)
6046 error ("%s expects an integer literal in the range [%d, %d]. ("
6047 HOST_WIDE_INT_PRINT_DEC ")",
6048 d->name,
6049 spu_builtin_range[range].low, spu_builtin_range[range].high,
6050 v);
6051
6052 switch (p)
6053 {
6054 case SPU_BTI_S10_4:
6055 lsbits = 4;
6056 break;
6057 case SPU_BTI_U16_2:
6058 /* This is only used in lqa, and stqa. Even though the insns
6059 encode 16 bits of the address (all but the 2 least
6060 significant), only 14 bits are used because it is masked to
6061 be 16 byte aligned. */
6062 lsbits = 4;
6063 break;
6064 case SPU_BTI_S16_2:
6065 /* This is used for lqr and stqr. */
6066 lsbits = 2;
6067 break;
6068 default:
6069 lsbits = 0;
6070 }
6071
6072 if (GET_CODE (op) == LABEL_REF
6073 || (GET_CODE (op) == SYMBOL_REF
6074 && SYMBOL_REF_FUNCTION_P (op))
6075 || (v & ((1 << lsbits) - 1)) != 0)
6076 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6077 d->name);
6078 }
6079 }
6080
6081
6082 static int
6083 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6084 rtx target, rtx ops[])
6085 {
6086 enum insn_code icode = (enum insn_code) d->icode;
6087 int i = 0, a;
6088
6089 /* Expand the arguments into rtl. */
6090
6091 if (d->parm[0] != SPU_BTI_VOID)
6092 ops[i++] = target;
6093
6094 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6095 {
6096 tree arg = CALL_EXPR_ARG (exp, a);
6097 if (arg == 0)
6098 abort ();
6099 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6100 }
6101
6102 /* The insn pattern may have additional operands (SCRATCH).
6103 Return the number of actual non-SCRATCH operands. */
6104 gcc_assert (i <= insn_data[icode].n_operands);
6105 return i;
6106 }
6107
6108 static rtx
6109 spu_expand_builtin_1 (struct spu_builtin_description *d,
6110 tree exp, rtx target)
6111 {
6112 rtx pat;
6113 rtx ops[8];
6114 enum insn_code icode = (enum insn_code) d->icode;
6115 enum machine_mode mode, tmode;
6116 int i, p;
6117 int n_operands;
6118 tree return_type;
6119
6120 /* Set up ops[] with values from arglist. */
6121 n_operands = expand_builtin_args (d, exp, target, ops);
6122
6123 /* Handle the target operand which must be operand 0. */
6124 i = 0;
6125 if (d->parm[0] != SPU_BTI_VOID)
6126 {
6127
6128 /* We prefer the mode specified for the match_operand otherwise
6129 use the mode from the builtin function prototype. */
6130 tmode = insn_data[d->icode].operand[0].mode;
6131 if (tmode == VOIDmode)
6132 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6133
6134 /* Try to use target because not using it can lead to extra copies
6135 and when we are using all of the registers extra copies leads
6136 to extra spills. */
6137 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6138 ops[0] = target;
6139 else
6140 target = ops[0] = gen_reg_rtx (tmode);
6141
6142 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6143 abort ();
6144
6145 i++;
6146 }
6147
6148 if (d->fcode == SPU_MASK_FOR_LOAD)
6149 {
6150 enum machine_mode mode = insn_data[icode].operand[1].mode;
6151 tree arg;
6152 rtx addr, op, pat;
6153
6154 /* get addr */
6155 arg = CALL_EXPR_ARG (exp, 0);
6156 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
6157 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6158 addr = memory_address (mode, op);
6159
6160 /* negate addr */
6161 op = gen_reg_rtx (GET_MODE (addr));
6162 emit_insn (gen_rtx_SET (VOIDmode, op,
6163 gen_rtx_NEG (GET_MODE (addr), addr)));
6164 op = gen_rtx_MEM (mode, op);
6165
6166 pat = GEN_FCN (icode) (target, op);
6167 if (!pat)
6168 return 0;
6169 emit_insn (pat);
6170 return target;
6171 }
6172
6173 /* Ignore align_hint, but still expand it's args in case they have
6174 side effects. */
6175 if (icode == CODE_FOR_spu_align_hint)
6176 return 0;
6177
6178 /* Handle the rest of the operands. */
6179 for (p = 1; i < n_operands; i++, p++)
6180 {
6181 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6182 mode = insn_data[d->icode].operand[i].mode;
6183 else
6184 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6185
6186 /* mode can be VOIDmode here for labels */
6187
6188 /* For specific intrinsics with an immediate operand, e.g.,
6189 si_ai(), we sometimes need to convert the scalar argument to a
6190 vector argument by splatting the scalar. */
6191 if (VECTOR_MODE_P (mode)
6192 && (GET_CODE (ops[i]) == CONST_INT
6193 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6194 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6195 {
6196 if (GET_CODE (ops[i]) == CONST_INT)
6197 ops[i] = spu_const (mode, INTVAL (ops[i]));
6198 else
6199 {
6200 rtx reg = gen_reg_rtx (mode);
6201 enum machine_mode imode = GET_MODE_INNER (mode);
6202 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6203 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6204 if (imode != GET_MODE (ops[i]))
6205 ops[i] = convert_to_mode (imode, ops[i],
6206 TYPE_UNSIGNED (spu_builtin_types
6207 [d->parm[i]]));
6208 emit_insn (gen_spu_splats (reg, ops[i]));
6209 ops[i] = reg;
6210 }
6211 }
6212
6213 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6214
6215 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6216 ops[i] = spu_force_reg (mode, ops[i]);
6217 }
6218
6219 switch (n_operands)
6220 {
6221 case 0:
6222 pat = GEN_FCN (icode) (0);
6223 break;
6224 case 1:
6225 pat = GEN_FCN (icode) (ops[0]);
6226 break;
6227 case 2:
6228 pat = GEN_FCN (icode) (ops[0], ops[1]);
6229 break;
6230 case 3:
6231 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6232 break;
6233 case 4:
6234 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6235 break;
6236 case 5:
6237 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6238 break;
6239 case 6:
6240 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6241 break;
6242 default:
6243 abort ();
6244 }
6245
6246 if (!pat)
6247 abort ();
6248
6249 if (d->type == B_CALL || d->type == B_BISLED)
6250 emit_call_insn (pat);
6251 else if (d->type == B_JUMP)
6252 {
6253 emit_jump_insn (pat);
6254 emit_barrier ();
6255 }
6256 else
6257 emit_insn (pat);
6258
6259 return_type = spu_builtin_types[d->parm[0]];
6260 if (d->parm[0] != SPU_BTI_VOID
6261 && GET_MODE (target) != TYPE_MODE (return_type))
6262 {
6263 /* target is the return value. It should always be the mode of
6264 the builtin function prototype. */
6265 target = spu_force_reg (TYPE_MODE (return_type), target);
6266 }
6267
6268 return target;
6269 }
6270
6271 rtx
6272 spu_expand_builtin (tree exp,
6273 rtx target,
6274 rtx subtarget ATTRIBUTE_UNUSED,
6275 enum machine_mode mode ATTRIBUTE_UNUSED,
6276 int ignore ATTRIBUTE_UNUSED)
6277 {
6278 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6279 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
6280 struct spu_builtin_description *d;
6281
6282 if (fcode < NUM_SPU_BUILTINS)
6283 {
6284 d = &spu_builtins[fcode];
6285
6286 return spu_expand_builtin_1 (d, exp, target);
6287 }
6288 abort ();
6289 }
6290
6291 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6292 static tree
6293 spu_builtin_mul_widen_even (tree type)
6294 {
6295 switch (TYPE_MODE (type))
6296 {
6297 case V8HImode:
6298 if (TYPE_UNSIGNED (type))
6299 return spu_builtins[SPU_MULE_0].fndecl;
6300 else
6301 return spu_builtins[SPU_MULE_1].fndecl;
6302 break;
6303 default:
6304 return NULL_TREE;
6305 }
6306 }
6307
6308 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6309 static tree
6310 spu_builtin_mul_widen_odd (tree type)
6311 {
6312 switch (TYPE_MODE (type))
6313 {
6314 case V8HImode:
6315 if (TYPE_UNSIGNED (type))
6316 return spu_builtins[SPU_MULO_1].fndecl;
6317 else
6318 return spu_builtins[SPU_MULO_0].fndecl;
6319 break;
6320 default:
6321 return NULL_TREE;
6322 }
6323 }
6324
6325 /* Implement targetm.vectorize.builtin_mask_for_load. */
6326 static tree
6327 spu_builtin_mask_for_load (void)
6328 {
6329 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
6330 gcc_assert (d);
6331 return d->fndecl;
6332 }
6333
6334 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6335 static int
6336 spu_builtin_vectorization_cost (bool runtime_test)
6337 {
6338 /* If the branch of the runtime test is taken - i.e. - the vectorized
6339 version is skipped - this incurs a misprediction cost (because the
6340 vectorized version is expected to be the fall-through). So we subtract
6341 the latency of a mispredicted branch from the costs that are incurred
6342 when the vectorized version is executed. */
6343 if (runtime_test)
6344 return -19;
6345 else
6346 return 0;
6347 }
6348
6349 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6350 after applying N number of iterations. This routine does not determine
6351 how may iterations are required to reach desired alignment. */
6352
6353 static bool
6354 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6355 {
6356 if (is_packed)
6357 return false;
6358
6359 /* All other types are naturally aligned. */
6360 return true;
6361 }
6362
6363 /* Implement targetm.vectorize.builtin_vec_perm. */
6364 tree
6365 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6366 {
6367 struct spu_builtin_description *d;
6368
6369 *mask_element_type = unsigned_char_type_node;
6370
6371 switch (TYPE_MODE (type))
6372 {
6373 case V16QImode:
6374 if (TYPE_UNSIGNED (type))
6375 d = &spu_builtins[SPU_SHUFFLE_0];
6376 else
6377 d = &spu_builtins[SPU_SHUFFLE_1];
6378 break;
6379
6380 case V8HImode:
6381 if (TYPE_UNSIGNED (type))
6382 d = &spu_builtins[SPU_SHUFFLE_2];
6383 else
6384 d = &spu_builtins[SPU_SHUFFLE_3];
6385 break;
6386
6387 case V4SImode:
6388 if (TYPE_UNSIGNED (type))
6389 d = &spu_builtins[SPU_SHUFFLE_4];
6390 else
6391 d = &spu_builtins[SPU_SHUFFLE_5];
6392 break;
6393
6394 case V2DImode:
6395 if (TYPE_UNSIGNED (type))
6396 d = &spu_builtins[SPU_SHUFFLE_6];
6397 else
6398 d = &spu_builtins[SPU_SHUFFLE_7];
6399 break;
6400
6401 case V4SFmode:
6402 d = &spu_builtins[SPU_SHUFFLE_8];
6403 break;
6404
6405 case V2DFmode:
6406 d = &spu_builtins[SPU_SHUFFLE_9];
6407 break;
6408
6409 default:
6410 return NULL_TREE;
6411 }
6412
6413 gcc_assert (d);
6414 return d->fndecl;
6415 }
6416
6417 /* Count the total number of instructions in each pipe and return the
6418 maximum, which is used as the Minimum Iteration Interval (MII)
6419 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6420 -2 are instructions that can go in pipe0 or pipe1. */
6421 static int
6422 spu_sms_res_mii (struct ddg *g)
6423 {
6424 int i;
6425 unsigned t[4] = {0, 0, 0, 0};
6426
6427 for (i = 0; i < g->num_nodes; i++)
6428 {
6429 rtx insn = g->nodes[i].insn;
6430 int p = get_pipe (insn) + 2;
6431
6432 assert (p >= 0);
6433 assert (p < 4);
6434
6435 t[p]++;
6436 if (dump_file && INSN_P (insn))
6437 fprintf (dump_file, "i%d %s %d %d\n",
6438 INSN_UID (insn),
6439 insn_data[INSN_CODE(insn)].name,
6440 p, t[p]);
6441 }
6442 if (dump_file)
6443 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6444
6445 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6446 }
6447
6448
6449 void
6450 spu_init_expanders (void)
6451 {
6452 if (cfun)
6453 {
6454 rtx r0, r1;
6455 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6456 frame_pointer_needed is true. We don't know that until we're
6457 expanding the prologue. */
6458 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6459
6460 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6461 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6462 to be treated as aligned, so generate them here. */
6463 r0 = gen_reg_rtx (SImode);
6464 r1 = gen_reg_rtx (SImode);
6465 mark_reg_pointer (r0, 128);
6466 mark_reg_pointer (r1, 128);
6467 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6468 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6469 }
6470 }
6471
6472 static enum machine_mode
6473 spu_libgcc_cmp_return_mode (void)
6474 {
6475
6476 /* For SPU word mode is TI mode so it is better to use SImode
6477 for compare returns. */
6478 return SImode;
6479 }
6480
6481 static enum machine_mode
6482 spu_libgcc_shift_count_mode (void)
6483 {
6484 /* For SPU word mode is TI mode so it is better to use SImode
6485 for shift counts. */
6486 return SImode;
6487 }
6488
6489 /* An early place to adjust some flags after GCC has finished processing
6490 * them. */
6491 static void
6492 asm_file_start (void)
6493 {
6494 /* Variable tracking should be run after all optimizations which
6495 change order of insns. It also needs a valid CFG. */
6496 spu_flag_var_tracking = flag_var_tracking;
6497 flag_var_tracking = 0;
6498
6499 default_file_start ();
6500 }
6501
6502 /* Implement targetm.section_type_flags. */
6503 static unsigned int
6504 spu_section_type_flags (tree decl, const char *name, int reloc)
6505 {
6506 /* .toe needs to have type @nobits. */
6507 if (strcmp (name, ".toe") == 0)
6508 return SECTION_BSS;
6509 return default_section_type_flags (decl, name, reloc);
6510 }
6511
6512 /* Generate a constant or register which contains 2^SCALE. We assume
6513 the result is valid for MODE. Currently, MODE must be V4SFmode and
6514 SCALE must be SImode. */
6515 rtx
6516 spu_gen_exp2 (enum machine_mode mode, rtx scale)
6517 {
6518 gcc_assert (mode == V4SFmode);
6519 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
6520 if (GET_CODE (scale) != CONST_INT)
6521 {
6522 /* unsigned int exp = (127 + scale) << 23;
6523 __vector float m = (__vector float) spu_splats (exp); */
6524 rtx reg = force_reg (SImode, scale);
6525 rtx exp = gen_reg_rtx (SImode);
6526 rtx mul = gen_reg_rtx (mode);
6527 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
6528 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
6529 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
6530 return mul;
6531 }
6532 else
6533 {
6534 HOST_WIDE_INT exp = 127 + INTVAL (scale);
6535 unsigned char arr[16];
6536 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
6537 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
6538 arr[2] = arr[6] = arr[10] = arr[14] = 0;
6539 arr[3] = arr[7] = arr[11] = arr[15] = 0;
6540 return array_to_constant (mode, arr);
6541 }
6542 }
6543
6544 /* After reload, just change the convert into a move instruction
6545 or a dead instruction. */
6546 void
6547 spu_split_convert (rtx ops[])
6548 {
6549 if (REGNO (ops[0]) == REGNO (ops[1]))
6550 emit_note (NOTE_INSN_DELETED);
6551 else
6552 {
6553 /* Use TImode always as this might help hard reg copyprop. */
6554 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
6555 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
6556 emit_insn (gen_move_insn (op0, op1));
6557 }
6558 }
6559
6560 #include "gt-spu.h"