rtl.h (emit_clobber, [...]): Declare.
[gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "real.h"
25 #include "insn-config.h"
26 #include "conditions.h"
27 #include "insn-attr.h"
28 #include "flags.h"
29 #include "recog.h"
30 #include "obstack.h"
31 #include "tree.h"
32 #include "expr.h"
33 #include "optabs.h"
34 #include "except.h"
35 #include "function.h"
36 #include "output.h"
37 #include "basic-block.h"
38 #include "integrate.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "c-common.h"
52 #include "machmode.h"
53 #include "tree-gimple.h"
54 #include "tm-constrs.h"
55 #include "spu-builtins.h"
56 #include "ddg.h"
57
58 /* Builtin types, data and prototypes. */
59 struct spu_builtin_range
60 {
61 int low, high;
62 };
63
64 static struct spu_builtin_range spu_builtin_range[] = {
65 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
66 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
67 {0ll, 0x7fll}, /* SPU_BTI_U7 */
68 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
69 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
70 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
71 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
72 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
73 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
74 {0ll, 0xffffll}, /* SPU_BTI_U16 */
75 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
76 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
77 };
78
79 \f
80 /* Target specific attribute specifications. */
81 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
82
83 /* Prototypes and external defs. */
84 static void spu_init_builtins (void);
85 static unsigned char spu_scalar_mode_supported_p (enum machine_mode mode);
86 static unsigned char spu_vector_mode_supported_p (enum machine_mode mode);
87 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
88 static rtx get_pic_reg (void);
89 static int need_to_save_reg (int regno, int saving);
90 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
91 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
92 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
93 rtx scratch);
94 static void emit_nop_for_insn (rtx insn);
95 static bool insn_clobbers_hbr (rtx insn);
96 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
97 int distance);
98 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
99 enum machine_mode dmode);
100 static rtx get_branch_target (rtx branch);
101 static void insert_branch_hints (void);
102 static void insert_nops (void);
103 static void spu_machine_dependent_reorg (void);
104 static int spu_sched_issue_rate (void);
105 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
106 int can_issue_more);
107 static int get_pipe (rtx insn);
108 static int spu_sched_adjust_priority (rtx insn, int pri);
109 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
110 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
111 int flags,
112 unsigned char *no_add_attrs);
113 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
114 int flags,
115 unsigned char *no_add_attrs);
116 static int spu_naked_function_p (tree func);
117 static unsigned char spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
118 const_tree type, unsigned char named);
119 static tree spu_build_builtin_va_list (void);
120 static void spu_va_start (tree, rtx);
121 static tree spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
122 tree * post_p);
123 static int regno_aligned_for_load (int regno);
124 static int store_with_one_insn_p (rtx mem);
125 static int reg_align (rtx reg);
126 static int mem_is_padded_component_ref (rtx x);
127 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
128 static void spu_asm_globalize_label (FILE * file, const char *name);
129 static unsigned char spu_rtx_costs (rtx x, int code, int outer_code,
130 int *total);
131 static unsigned char spu_function_ok_for_sibcall (tree decl, tree exp);
132 static void spu_init_libfuncs (void);
133 static bool spu_return_in_memory (const_tree type, const_tree fntype);
134 static void fix_range (const char *);
135 static void spu_encode_section_info (tree, rtx, int);
136 static tree spu_builtin_mul_widen_even (tree);
137 static tree spu_builtin_mul_widen_odd (tree);
138 static tree spu_builtin_mask_for_load (void);
139 static int spu_builtin_vectorization_cost (bool);
140 static bool spu_vector_alignment_reachable (const_tree, bool);
141 static int spu_sms_res_mii (struct ddg *g);
142
143 extern const char *reg_names[];
144 rtx spu_compare_op0, spu_compare_op1;
145
146 /* Which instruction set architecture to use. */
147 int spu_arch;
148 /* Which cpu are we tuning for. */
149 int spu_tune;
150
151 enum spu_immediate {
152 SPU_NONE,
153 SPU_IL,
154 SPU_ILA,
155 SPU_ILH,
156 SPU_ILHU,
157 SPU_ORI,
158 SPU_ORHI,
159 SPU_ORBI,
160 SPU_IOHL
161 };
162 enum immediate_class
163 {
164 IC_POOL, /* constant pool */
165 IC_IL1, /* one il* instruction */
166 IC_IL2, /* both ilhu and iohl instructions */
167 IC_IL1s, /* one il* instruction */
168 IC_IL2s, /* both ilhu and iohl instructions */
169 IC_FSMBI, /* the fsmbi instruction */
170 IC_CPAT, /* one of the c*d instructions */
171 IC_FSMBI2 /* fsmbi plus 1 other instruction */
172 };
173
174 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
175 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
176 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
177 static enum immediate_class classify_immediate (rtx op,
178 enum machine_mode mode);
179
180 static enum machine_mode
181 spu_libgcc_cmp_return_mode (void);
182
183 static enum machine_mode
184 spu_libgcc_shift_count_mode (void);
185
186 /* Built in types. */
187 tree spu_builtin_types[SPU_BTI_MAX];
188 \f
189 /* TARGET overrides. */
190
191 #undef TARGET_INIT_BUILTINS
192 #define TARGET_INIT_BUILTINS spu_init_builtins
193
194 #undef TARGET_EXPAND_BUILTIN
195 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
196
197 #undef TARGET_EH_RETURN_FILTER_MODE
198 #define TARGET_EH_RETURN_FILTER_MODE spu_eh_return_filter_mode
199
200 /* The .8byte directive doesn't seem to work well for a 32 bit
201 architecture. */
202 #undef TARGET_ASM_UNALIGNED_DI_OP
203 #define TARGET_ASM_UNALIGNED_DI_OP NULL
204
205 #undef TARGET_RTX_COSTS
206 #define TARGET_RTX_COSTS spu_rtx_costs
207
208 #undef TARGET_ADDRESS_COST
209 #define TARGET_ADDRESS_COST hook_int_rtx_0
210
211 #undef TARGET_SCHED_ISSUE_RATE
212 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
213
214 #undef TARGET_SCHED_VARIABLE_ISSUE
215 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
216
217 #undef TARGET_SCHED_ADJUST_PRIORITY
218 #define TARGET_SCHED_ADJUST_PRIORITY spu_sched_adjust_priority
219
220 #undef TARGET_SCHED_ADJUST_COST
221 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
222
223 const struct attribute_spec spu_attribute_table[];
224 #undef TARGET_ATTRIBUTE_TABLE
225 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
226
227 #undef TARGET_ASM_INTEGER
228 #define TARGET_ASM_INTEGER spu_assemble_integer
229
230 #undef TARGET_SCALAR_MODE_SUPPORTED_P
231 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
232
233 #undef TARGET_VECTOR_MODE_SUPPORTED_P
234 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
235
236 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
237 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
238
239 #undef TARGET_ASM_GLOBALIZE_LABEL
240 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
241
242 #undef TARGET_PASS_BY_REFERENCE
243 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
244
245 #undef TARGET_MUST_PASS_IN_STACK
246 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
247
248 #undef TARGET_BUILD_BUILTIN_VA_LIST
249 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
250
251 #undef TARGET_EXPAND_BUILTIN_VA_START
252 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
253
254 #undef TARGET_SETUP_INCOMING_VARARGS
255 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
256
257 #undef TARGET_MACHINE_DEPENDENT_REORG
258 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
259
260 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
261 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
262
263 #undef TARGET_DEFAULT_TARGET_FLAGS
264 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
265
266 #undef TARGET_INIT_LIBFUNCS
267 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
268
269 #undef TARGET_RETURN_IN_MEMORY
270 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
271
272 #undef TARGET_ENCODE_SECTION_INFO
273 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
274
275 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
276 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
277
278 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
279 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
280
281 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
282 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
283
284 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
285 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
286
287 #undef TARGET_VECTOR_ALIGNMENT_REACHABLE
288 #define TARGET_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
289
290 #undef TARGET_LIBGCC_CMP_RETURN_MODE
291 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
292
293 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
294 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
295
296 #undef TARGET_SCHED_SMS_RES_MII
297 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
298
299 struct gcc_target targetm = TARGET_INITIALIZER;
300
301 void
302 spu_optimization_options (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
303 {
304 /* Override some of the default param values. With so many registers
305 larger values are better for these params. */
306 MAX_PENDING_LIST_LENGTH = 128;
307
308 /* With so many registers this is better on by default. */
309 flag_rename_registers = 1;
310 }
311
312 /* Sometimes certain combinations of command options do not make sense
313 on a particular target machine. You can define a macro
314 OVERRIDE_OPTIONS to take account of this. This macro, if defined, is
315 executed once just after all the command options have been parsed. */
316 void
317 spu_override_options (void)
318 {
319 /* Small loops will be unpeeled at -O3. For SPU it is more important
320 to keep code small by default. */
321 if (!flag_unroll_loops && !flag_peel_loops
322 && !PARAM_SET_P (PARAM_MAX_COMPLETELY_PEEL_TIMES))
323 PARAM_VALUE (PARAM_MAX_COMPLETELY_PEEL_TIMES) = 1;
324
325 flag_omit_frame_pointer = 1;
326
327 if (align_functions < 8)
328 align_functions = 8;
329
330 if (spu_fixed_range_string)
331 fix_range (spu_fixed_range_string);
332
333 /* Determine processor architectural level. */
334 if (spu_arch_string)
335 {
336 if (strcmp (&spu_arch_string[0], "cell") == 0)
337 spu_arch = PROCESSOR_CELL;
338 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
339 spu_arch = PROCESSOR_CELLEDP;
340 else
341 error ("Unknown architecture '%s'", &spu_arch_string[0]);
342 }
343
344 /* Determine processor to tune for. */
345 if (spu_tune_string)
346 {
347 if (strcmp (&spu_tune_string[0], "cell") == 0)
348 spu_tune = PROCESSOR_CELL;
349 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
350 spu_tune = PROCESSOR_CELLEDP;
351 else
352 error ("Unknown architecture '%s'", &spu_tune_string[0]);
353 }
354 }
355 \f
356 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
357 struct attribute_spec.handler. */
358
359 /* Table of machine attributes. */
360 const struct attribute_spec spu_attribute_table[] =
361 {
362 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
363 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
364 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
365 { NULL, 0, 0, false, false, false, NULL }
366 };
367
368 /* True if MODE is valid for the target. By "valid", we mean able to
369 be manipulated in non-trivial ways. In particular, this means all
370 the arithmetic is supported. */
371 static bool
372 spu_scalar_mode_supported_p (enum machine_mode mode)
373 {
374 switch (mode)
375 {
376 case QImode:
377 case HImode:
378 case SImode:
379 case SFmode:
380 case DImode:
381 case TImode:
382 case DFmode:
383 return true;
384
385 default:
386 return false;
387 }
388 }
389
390 /* Similarly for vector modes. "Supported" here is less strict. At
391 least some operations are supported; need to check optabs or builtins
392 for further details. */
393 static bool
394 spu_vector_mode_supported_p (enum machine_mode mode)
395 {
396 switch (mode)
397 {
398 case V16QImode:
399 case V8HImode:
400 case V4SImode:
401 case V2DImode:
402 case V4SFmode:
403 case V2DFmode:
404 return true;
405
406 default:
407 return false;
408 }
409 }
410
411 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
412 least significant bytes of the outer mode. This function returns
413 TRUE for the SUBREG's where this is correct. */
414 int
415 valid_subreg (rtx op)
416 {
417 enum machine_mode om = GET_MODE (op);
418 enum machine_mode im = GET_MODE (SUBREG_REG (op));
419 return om != VOIDmode && im != VOIDmode
420 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
421 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4));
422 }
423
424 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
425 and adjust the start offset. */
426 static rtx
427 adjust_operand (rtx op, HOST_WIDE_INT * start)
428 {
429 enum machine_mode mode;
430 int op_size;
431 /* Strip any SUBREG */
432 if (GET_CODE (op) == SUBREG)
433 {
434 if (start)
435 *start -=
436 GET_MODE_BITSIZE (GET_MODE (op)) -
437 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
438 op = SUBREG_REG (op);
439 }
440 /* If it is smaller than SI, assure a SUBREG */
441 op_size = GET_MODE_BITSIZE (GET_MODE (op));
442 if (op_size < 32)
443 {
444 if (start)
445 *start += 32 - op_size;
446 op_size = 32;
447 }
448 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
449 mode = mode_for_size (op_size, MODE_INT, 0);
450 if (mode != GET_MODE (op))
451 op = gen_rtx_SUBREG (mode, op, 0);
452 return op;
453 }
454
455 void
456 spu_expand_extv (rtx ops[], int unsignedp)
457 {
458 HOST_WIDE_INT width = INTVAL (ops[2]);
459 HOST_WIDE_INT start = INTVAL (ops[3]);
460 HOST_WIDE_INT src_size, dst_size;
461 enum machine_mode src_mode, dst_mode;
462 rtx dst = ops[0], src = ops[1];
463 rtx s;
464
465 dst = adjust_operand (ops[0], 0);
466 dst_mode = GET_MODE (dst);
467 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
468
469 src = adjust_operand (src, &start);
470 src_mode = GET_MODE (src);
471 src_size = GET_MODE_BITSIZE (GET_MODE (src));
472
473 if (start > 0)
474 {
475 s = gen_reg_rtx (src_mode);
476 switch (src_mode)
477 {
478 case SImode:
479 emit_insn (gen_ashlsi3 (s, src, GEN_INT (start)));
480 break;
481 case DImode:
482 emit_insn (gen_ashldi3 (s, src, GEN_INT (start)));
483 break;
484 case TImode:
485 emit_insn (gen_ashlti3 (s, src, GEN_INT (start)));
486 break;
487 default:
488 abort ();
489 }
490 src = s;
491 }
492
493 if (width < src_size)
494 {
495 rtx pat;
496 int icode;
497 switch (src_mode)
498 {
499 case SImode:
500 icode = unsignedp ? CODE_FOR_lshrsi3 : CODE_FOR_ashrsi3;
501 break;
502 case DImode:
503 icode = unsignedp ? CODE_FOR_lshrdi3 : CODE_FOR_ashrdi3;
504 break;
505 case TImode:
506 icode = unsignedp ? CODE_FOR_lshrti3 : CODE_FOR_ashrti3;
507 break;
508 default:
509 abort ();
510 }
511 s = gen_reg_rtx (src_mode);
512 pat = GEN_FCN (icode) (s, src, GEN_INT (src_size - width));
513 emit_insn (pat);
514 src = s;
515 }
516
517 convert_move (dst, src, unsignedp);
518 }
519
520 void
521 spu_expand_insv (rtx ops[])
522 {
523 HOST_WIDE_INT width = INTVAL (ops[1]);
524 HOST_WIDE_INT start = INTVAL (ops[2]);
525 HOST_WIDE_INT maskbits;
526 enum machine_mode dst_mode, src_mode;
527 rtx dst = ops[0], src = ops[3];
528 int dst_size, src_size;
529 rtx mask;
530 rtx shift_reg;
531 int shift;
532
533
534 if (GET_CODE (ops[0]) == MEM)
535 dst = gen_reg_rtx (TImode);
536 else
537 dst = adjust_operand (dst, &start);
538 dst_mode = GET_MODE (dst);
539 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
540
541 if (CONSTANT_P (src))
542 {
543 enum machine_mode m =
544 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
545 src = force_reg (m, convert_to_mode (m, src, 0));
546 }
547 src = adjust_operand (src, 0);
548 src_mode = GET_MODE (src);
549 src_size = GET_MODE_BITSIZE (GET_MODE (src));
550
551 mask = gen_reg_rtx (dst_mode);
552 shift_reg = gen_reg_rtx (dst_mode);
553 shift = dst_size - start - width;
554
555 /* It's not safe to use subreg here because the compiler assumes
556 that the SUBREG_REG is right justified in the SUBREG. */
557 convert_move (shift_reg, src, 1);
558
559 if (shift > 0)
560 {
561 switch (dst_mode)
562 {
563 case SImode:
564 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
565 break;
566 case DImode:
567 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
568 break;
569 case TImode:
570 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
571 break;
572 default:
573 abort ();
574 }
575 }
576 else if (shift < 0)
577 abort ();
578
579 switch (dst_size)
580 {
581 case 32:
582 maskbits = (-1ll << (32 - width - start));
583 if (start)
584 maskbits += (1ll << (32 - start));
585 emit_move_insn (mask, GEN_INT (maskbits));
586 break;
587 case 64:
588 maskbits = (-1ll << (64 - width - start));
589 if (start)
590 maskbits += (1ll << (64 - start));
591 emit_move_insn (mask, GEN_INT (maskbits));
592 break;
593 case 128:
594 {
595 unsigned char arr[16];
596 int i = start / 8;
597 memset (arr, 0, sizeof (arr));
598 arr[i] = 0xff >> (start & 7);
599 for (i++; i <= (start + width - 1) / 8; i++)
600 arr[i] = 0xff;
601 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
602 emit_move_insn (mask, array_to_constant (TImode, arr));
603 }
604 break;
605 default:
606 abort ();
607 }
608 if (GET_CODE (ops[0]) == MEM)
609 {
610 rtx aligned = gen_reg_rtx (SImode);
611 rtx low = gen_reg_rtx (SImode);
612 rtx addr = gen_reg_rtx (SImode);
613 rtx rotl = gen_reg_rtx (SImode);
614 rtx mask0 = gen_reg_rtx (TImode);
615 rtx mem;
616
617 emit_move_insn (addr, XEXP (ops[0], 0));
618 emit_insn (gen_andsi3 (aligned, addr, GEN_INT (-16)));
619 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
620 emit_insn (gen_negsi2 (rotl, low));
621 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
622 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
623 mem = change_address (ops[0], TImode, aligned);
624 set_mem_alias_set (mem, 0);
625 emit_move_insn (dst, mem);
626 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
627 emit_move_insn (mem, dst);
628 if (start + width > MEM_ALIGN (ops[0]))
629 {
630 rtx shl = gen_reg_rtx (SImode);
631 rtx mask1 = gen_reg_rtx (TImode);
632 rtx dst1 = gen_reg_rtx (TImode);
633 rtx mem1;
634 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
635 emit_insn (gen_shlqby_ti (mask1, mask, shl));
636 mem1 = adjust_address (mem, TImode, 16);
637 set_mem_alias_set (mem1, 0);
638 emit_move_insn (dst1, mem1);
639 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
640 emit_move_insn (mem1, dst1);
641 }
642 }
643 else
644 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
645 }
646
647
648 int
649 spu_expand_block_move (rtx ops[])
650 {
651 HOST_WIDE_INT bytes, align, offset;
652 rtx src, dst, sreg, dreg, target;
653 int i;
654 if (GET_CODE (ops[2]) != CONST_INT
655 || GET_CODE (ops[3]) != CONST_INT
656 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO * 8))
657 return 0;
658
659 bytes = INTVAL (ops[2]);
660 align = INTVAL (ops[3]);
661
662 if (bytes <= 0)
663 return 1;
664
665 dst = ops[0];
666 src = ops[1];
667
668 if (align == 16)
669 {
670 for (offset = 0; offset + 16 <= bytes; offset += 16)
671 {
672 dst = adjust_address (ops[0], V16QImode, offset);
673 src = adjust_address (ops[1], V16QImode, offset);
674 emit_move_insn (dst, src);
675 }
676 if (offset < bytes)
677 {
678 rtx mask;
679 unsigned char arr[16] = { 0 };
680 for (i = 0; i < bytes - offset; i++)
681 arr[i] = 0xff;
682 dst = adjust_address (ops[0], V16QImode, offset);
683 src = adjust_address (ops[1], V16QImode, offset);
684 mask = gen_reg_rtx (V16QImode);
685 sreg = gen_reg_rtx (V16QImode);
686 dreg = gen_reg_rtx (V16QImode);
687 target = gen_reg_rtx (V16QImode);
688 emit_move_insn (mask, array_to_constant (V16QImode, arr));
689 emit_move_insn (dreg, dst);
690 emit_move_insn (sreg, src);
691 emit_insn (gen_selb (target, dreg, sreg, mask));
692 emit_move_insn (dst, target);
693 }
694 return 1;
695 }
696 return 0;
697 }
698
699 enum spu_comp_code
700 { SPU_EQ, SPU_GT, SPU_GTU };
701
702 int spu_comp_icode[12][3] = {
703 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
704 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
705 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
706 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
707 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
708 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
709 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
710 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
711 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
712 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
713 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
714 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
715 };
716
717 /* Generate a compare for CODE. Return a brand-new rtx that represents
718 the result of the compare. GCC can figure this out too if we don't
719 provide all variations of compares, but GCC always wants to use
720 WORD_MODE, we can generate better code in most cases if we do it
721 ourselves. */
722 void
723 spu_emit_branch_or_set (int is_set, enum rtx_code code, rtx operands[])
724 {
725 int reverse_compare = 0;
726 int reverse_test = 0;
727 rtx compare_result, eq_result;
728 rtx comp_rtx, eq_rtx;
729 rtx target = operands[0];
730 enum machine_mode comp_mode;
731 enum machine_mode op_mode;
732 enum spu_comp_code scode, eq_code, ior_code;
733 int index;
734 int eq_test = 0;
735
736 /* When spu_compare_op1 is a CONST_INT change (X >= C) to (X > C-1),
737 and so on, to keep the constant in operand 1. */
738 if (GET_CODE (spu_compare_op1) == CONST_INT)
739 {
740 HOST_WIDE_INT val = INTVAL (spu_compare_op1) - 1;
741 if (trunc_int_for_mode (val, GET_MODE (spu_compare_op0)) == val)
742 switch (code)
743 {
744 case GE:
745 spu_compare_op1 = GEN_INT (val);
746 code = GT;
747 break;
748 case LT:
749 spu_compare_op1 = GEN_INT (val);
750 code = LE;
751 break;
752 case GEU:
753 spu_compare_op1 = GEN_INT (val);
754 code = GTU;
755 break;
756 case LTU:
757 spu_compare_op1 = GEN_INT (val);
758 code = LEU;
759 break;
760 default:
761 break;
762 }
763 }
764
765 comp_mode = SImode;
766 op_mode = GET_MODE (spu_compare_op0);
767
768 switch (code)
769 {
770 case GE:
771 scode = SPU_GT;
772 if (HONOR_NANS (op_mode))
773 {
774 reverse_compare = 0;
775 reverse_test = 0;
776 eq_test = 1;
777 eq_code = SPU_EQ;
778 }
779 else
780 {
781 reverse_compare = 1;
782 reverse_test = 1;
783 }
784 break;
785 case LE:
786 scode = SPU_GT;
787 if (HONOR_NANS (op_mode))
788 {
789 reverse_compare = 1;
790 reverse_test = 0;
791 eq_test = 1;
792 eq_code = SPU_EQ;
793 }
794 else
795 {
796 reverse_compare = 0;
797 reverse_test = 1;
798 }
799 break;
800 case LT:
801 reverse_compare = 1;
802 reverse_test = 0;
803 scode = SPU_GT;
804 break;
805 case GEU:
806 reverse_compare = 1;
807 reverse_test = 1;
808 scode = SPU_GTU;
809 break;
810 case LEU:
811 reverse_compare = 0;
812 reverse_test = 1;
813 scode = SPU_GTU;
814 break;
815 case LTU:
816 reverse_compare = 1;
817 reverse_test = 0;
818 scode = SPU_GTU;
819 break;
820 case NE:
821 reverse_compare = 0;
822 reverse_test = 1;
823 scode = SPU_EQ;
824 break;
825
826 case EQ:
827 scode = SPU_EQ;
828 break;
829 case GT:
830 scode = SPU_GT;
831 break;
832 case GTU:
833 scode = SPU_GTU;
834 break;
835 default:
836 scode = SPU_EQ;
837 break;
838 }
839
840 switch (op_mode)
841 {
842 case QImode:
843 index = 0;
844 comp_mode = QImode;
845 break;
846 case HImode:
847 index = 1;
848 comp_mode = HImode;
849 break;
850 case SImode:
851 index = 2;
852 break;
853 case DImode:
854 index = 3;
855 break;
856 case TImode:
857 index = 4;
858 break;
859 case SFmode:
860 index = 5;
861 break;
862 case DFmode:
863 index = 6;
864 break;
865 case V16QImode:
866 index = 7;
867 comp_mode = op_mode;
868 break;
869 case V8HImode:
870 index = 8;
871 comp_mode = op_mode;
872 break;
873 case V4SImode:
874 index = 9;
875 comp_mode = op_mode;
876 break;
877 case V4SFmode:
878 index = 10;
879 comp_mode = V4SImode;
880 break;
881 case V2DFmode:
882 index = 11;
883 comp_mode = V2DImode;
884 break;
885 case V2DImode:
886 default:
887 abort ();
888 }
889
890 if (GET_MODE (spu_compare_op1) == DFmode
891 && (scode != SPU_GT && scode != SPU_EQ))
892 abort ();
893
894 if (is_set == 0 && spu_compare_op1 == const0_rtx
895 && (GET_MODE (spu_compare_op0) == SImode
896 || GET_MODE (spu_compare_op0) == HImode) && scode == SPU_EQ)
897 {
898 /* Don't need to set a register with the result when we are
899 comparing against zero and branching. */
900 reverse_test = !reverse_test;
901 compare_result = spu_compare_op0;
902 }
903 else
904 {
905 compare_result = gen_reg_rtx (comp_mode);
906
907 if (reverse_compare)
908 {
909 rtx t = spu_compare_op1;
910 spu_compare_op1 = spu_compare_op0;
911 spu_compare_op0 = t;
912 }
913
914 if (spu_comp_icode[index][scode] == 0)
915 abort ();
916
917 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
918 (spu_compare_op0, op_mode))
919 spu_compare_op0 = force_reg (op_mode, spu_compare_op0);
920 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
921 (spu_compare_op1, op_mode))
922 spu_compare_op1 = force_reg (op_mode, spu_compare_op1);
923 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
924 spu_compare_op0,
925 spu_compare_op1);
926 if (comp_rtx == 0)
927 abort ();
928 emit_insn (comp_rtx);
929
930 if (eq_test)
931 {
932 eq_result = gen_reg_rtx (comp_mode);
933 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
934 spu_compare_op0,
935 spu_compare_op1);
936 if (eq_rtx == 0)
937 abort ();
938 emit_insn (eq_rtx);
939 ior_code = ior_optab->handlers[(int)comp_mode].insn_code;
940 gcc_assert (ior_code != CODE_FOR_nothing);
941 emit_insn (GEN_FCN (ior_code)
942 (compare_result, compare_result, eq_result));
943 }
944 }
945
946 if (is_set == 0)
947 {
948 rtx bcomp;
949 rtx loc_ref;
950
951 /* We don't have branch on QI compare insns, so we convert the
952 QI compare result to a HI result. */
953 if (comp_mode == QImode)
954 {
955 rtx old_res = compare_result;
956 compare_result = gen_reg_rtx (HImode);
957 comp_mode = HImode;
958 emit_insn (gen_extendqihi2 (compare_result, old_res));
959 }
960
961 if (reverse_test)
962 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
963 else
964 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
965
966 loc_ref = gen_rtx_LABEL_REF (VOIDmode, target);
967 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
968 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
969 loc_ref, pc_rtx)));
970 }
971 else if (is_set == 2)
972 {
973 int compare_size = GET_MODE_BITSIZE (comp_mode);
974 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
975 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
976 rtx select_mask;
977 rtx op_t = operands[2];
978 rtx op_f = operands[3];
979
980 /* The result of the comparison can be SI, HI or QI mode. Create a
981 mask based on that result. */
982 if (target_size > compare_size)
983 {
984 select_mask = gen_reg_rtx (mode);
985 emit_insn (gen_extend_compare (select_mask, compare_result));
986 }
987 else if (target_size < compare_size)
988 select_mask =
989 gen_rtx_SUBREG (mode, compare_result,
990 (compare_size - target_size) / BITS_PER_UNIT);
991 else if (comp_mode != mode)
992 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
993 else
994 select_mask = compare_result;
995
996 if (GET_MODE (target) != GET_MODE (op_t)
997 || GET_MODE (target) != GET_MODE (op_f))
998 abort ();
999
1000 if (reverse_test)
1001 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1002 else
1003 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1004 }
1005 else
1006 {
1007 if (reverse_test)
1008 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1009 gen_rtx_NOT (comp_mode, compare_result)));
1010 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1011 emit_insn (gen_extendhisi2 (target, compare_result));
1012 else if (GET_MODE (target) == SImode
1013 && GET_MODE (compare_result) == QImode)
1014 emit_insn (gen_extend_compare (target, compare_result));
1015 else
1016 emit_move_insn (target, compare_result);
1017 }
1018 }
1019
1020 HOST_WIDE_INT
1021 const_double_to_hwint (rtx x)
1022 {
1023 HOST_WIDE_INT val;
1024 REAL_VALUE_TYPE rv;
1025 if (GET_MODE (x) == SFmode)
1026 {
1027 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1028 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1029 }
1030 else if (GET_MODE (x) == DFmode)
1031 {
1032 long l[2];
1033 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1034 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1035 val = l[0];
1036 val = (val << 32) | (l[1] & 0xffffffff);
1037 }
1038 else
1039 abort ();
1040 return val;
1041 }
1042
1043 rtx
1044 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1045 {
1046 long tv[2];
1047 REAL_VALUE_TYPE rv;
1048 gcc_assert (mode == SFmode || mode == DFmode);
1049
1050 if (mode == SFmode)
1051 tv[0] = (v << 32) >> 32;
1052 else if (mode == DFmode)
1053 {
1054 tv[1] = (v << 32) >> 32;
1055 tv[0] = v >> 32;
1056 }
1057 real_from_target (&rv, tv, mode);
1058 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1059 }
1060
1061 void
1062 print_operand_address (FILE * file, register rtx addr)
1063 {
1064 rtx reg;
1065 rtx offset;
1066
1067 if (GET_CODE (addr) == AND
1068 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1069 && INTVAL (XEXP (addr, 1)) == -16)
1070 addr = XEXP (addr, 0);
1071
1072 switch (GET_CODE (addr))
1073 {
1074 case REG:
1075 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1076 break;
1077
1078 case PLUS:
1079 reg = XEXP (addr, 0);
1080 offset = XEXP (addr, 1);
1081 if (GET_CODE (offset) == REG)
1082 {
1083 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1084 reg_names[REGNO (offset)]);
1085 }
1086 else if (GET_CODE (offset) == CONST_INT)
1087 {
1088 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1089 INTVAL (offset), reg_names[REGNO (reg)]);
1090 }
1091 else
1092 abort ();
1093 break;
1094
1095 case CONST:
1096 case LABEL_REF:
1097 case SYMBOL_REF:
1098 case CONST_INT:
1099 output_addr_const (file, addr);
1100 break;
1101
1102 default:
1103 debug_rtx (addr);
1104 abort ();
1105 }
1106 }
1107
1108 void
1109 print_operand (FILE * file, rtx x, int code)
1110 {
1111 enum machine_mode mode = GET_MODE (x);
1112 HOST_WIDE_INT val;
1113 unsigned char arr[16];
1114 int xcode = GET_CODE (x);
1115 int i, info;
1116 if (GET_MODE (x) == VOIDmode)
1117 switch (code)
1118 {
1119 case 'L': /* 128 bits, signed */
1120 case 'm': /* 128 bits, signed */
1121 case 'T': /* 128 bits, signed */
1122 case 't': /* 128 bits, signed */
1123 mode = TImode;
1124 break;
1125 case 'K': /* 64 bits, signed */
1126 case 'k': /* 64 bits, signed */
1127 case 'D': /* 64 bits, signed */
1128 case 'd': /* 64 bits, signed */
1129 mode = DImode;
1130 break;
1131 case 'J': /* 32 bits, signed */
1132 case 'j': /* 32 bits, signed */
1133 case 's': /* 32 bits, signed */
1134 case 'S': /* 32 bits, signed */
1135 mode = SImode;
1136 break;
1137 }
1138 switch (code)
1139 {
1140
1141 case 'j': /* 32 bits, signed */
1142 case 'k': /* 64 bits, signed */
1143 case 'm': /* 128 bits, signed */
1144 if (xcode == CONST_INT
1145 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1146 {
1147 gcc_assert (logical_immediate_p (x, mode));
1148 constant_to_array (mode, x, arr);
1149 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1150 val = trunc_int_for_mode (val, SImode);
1151 switch (which_logical_immediate (val))
1152 {
1153 case SPU_ORI:
1154 break;
1155 case SPU_ORHI:
1156 fprintf (file, "h");
1157 break;
1158 case SPU_ORBI:
1159 fprintf (file, "b");
1160 break;
1161 default:
1162 gcc_unreachable();
1163 }
1164 }
1165 else
1166 gcc_unreachable();
1167 return;
1168
1169 case 'J': /* 32 bits, signed */
1170 case 'K': /* 64 bits, signed */
1171 case 'L': /* 128 bits, signed */
1172 if (xcode == CONST_INT
1173 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1174 {
1175 gcc_assert (logical_immediate_p (x, mode)
1176 || iohl_immediate_p (x, mode));
1177 constant_to_array (mode, x, arr);
1178 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1179 val = trunc_int_for_mode (val, SImode);
1180 switch (which_logical_immediate (val))
1181 {
1182 case SPU_ORI:
1183 case SPU_IOHL:
1184 break;
1185 case SPU_ORHI:
1186 val = trunc_int_for_mode (val, HImode);
1187 break;
1188 case SPU_ORBI:
1189 val = trunc_int_for_mode (val, QImode);
1190 break;
1191 default:
1192 gcc_unreachable();
1193 }
1194 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1195 }
1196 else
1197 gcc_unreachable();
1198 return;
1199
1200 case 't': /* 128 bits, signed */
1201 case 'd': /* 64 bits, signed */
1202 case 's': /* 32 bits, signed */
1203 if (CONSTANT_P (x))
1204 {
1205 enum immediate_class c = classify_immediate (x, mode);
1206 switch (c)
1207 {
1208 case IC_IL1:
1209 constant_to_array (mode, x, arr);
1210 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1211 val = trunc_int_for_mode (val, SImode);
1212 switch (which_immediate_load (val))
1213 {
1214 case SPU_IL:
1215 break;
1216 case SPU_ILA:
1217 fprintf (file, "a");
1218 break;
1219 case SPU_ILH:
1220 fprintf (file, "h");
1221 break;
1222 case SPU_ILHU:
1223 fprintf (file, "hu");
1224 break;
1225 default:
1226 gcc_unreachable ();
1227 }
1228 break;
1229 case IC_CPAT:
1230 constant_to_array (mode, x, arr);
1231 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1232 if (info == 1)
1233 fprintf (file, "b");
1234 else if (info == 2)
1235 fprintf (file, "h");
1236 else if (info == 4)
1237 fprintf (file, "w");
1238 else if (info == 8)
1239 fprintf (file, "d");
1240 break;
1241 case IC_IL1s:
1242 if (xcode == CONST_VECTOR)
1243 {
1244 x = CONST_VECTOR_ELT (x, 0);
1245 xcode = GET_CODE (x);
1246 }
1247 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1248 fprintf (file, "a");
1249 else if (xcode == HIGH)
1250 fprintf (file, "hu");
1251 break;
1252 case IC_FSMBI:
1253 case IC_FSMBI2:
1254 case IC_IL2:
1255 case IC_IL2s:
1256 case IC_POOL:
1257 abort ();
1258 }
1259 }
1260 else
1261 gcc_unreachable ();
1262 return;
1263
1264 case 'T': /* 128 bits, signed */
1265 case 'D': /* 64 bits, signed */
1266 case 'S': /* 32 bits, signed */
1267 if (CONSTANT_P (x))
1268 {
1269 enum immediate_class c = classify_immediate (x, mode);
1270 switch (c)
1271 {
1272 case IC_IL1:
1273 constant_to_array (mode, x, arr);
1274 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1275 val = trunc_int_for_mode (val, SImode);
1276 switch (which_immediate_load (val))
1277 {
1278 case SPU_IL:
1279 case SPU_ILA:
1280 break;
1281 case SPU_ILH:
1282 case SPU_ILHU:
1283 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1284 break;
1285 default:
1286 gcc_unreachable ();
1287 }
1288 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1289 break;
1290 case IC_FSMBI:
1291 constant_to_array (mode, x, arr);
1292 val = 0;
1293 for (i = 0; i < 16; i++)
1294 {
1295 val <<= 1;
1296 val |= arr[i] & 1;
1297 }
1298 print_operand (file, GEN_INT (val), 0);
1299 break;
1300 case IC_CPAT:
1301 constant_to_array (mode, x, arr);
1302 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1303 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1304 break;
1305 case IC_IL1s:
1306 if (xcode == HIGH)
1307 x = XEXP (x, 0);
1308 if (GET_CODE (x) == CONST_VECTOR)
1309 x = CONST_VECTOR_ELT (x, 0);
1310 output_addr_const (file, x);
1311 if (xcode == HIGH)
1312 fprintf (file, "@h");
1313 break;
1314 case IC_IL2:
1315 case IC_IL2s:
1316 case IC_FSMBI2:
1317 case IC_POOL:
1318 abort ();
1319 }
1320 }
1321 else
1322 gcc_unreachable ();
1323 return;
1324
1325 case 'C':
1326 if (xcode == CONST_INT)
1327 {
1328 /* Only 4 least significant bits are relevant for generate
1329 control word instructions. */
1330 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1331 return;
1332 }
1333 break;
1334
1335 case 'M': /* print code for c*d */
1336 if (GET_CODE (x) == CONST_INT)
1337 switch (INTVAL (x))
1338 {
1339 case 1:
1340 fprintf (file, "b");
1341 break;
1342 case 2:
1343 fprintf (file, "h");
1344 break;
1345 case 4:
1346 fprintf (file, "w");
1347 break;
1348 case 8:
1349 fprintf (file, "d");
1350 break;
1351 default:
1352 gcc_unreachable();
1353 }
1354 else
1355 gcc_unreachable();
1356 return;
1357
1358 case 'N': /* Negate the operand */
1359 if (xcode == CONST_INT)
1360 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1361 else if (xcode == CONST_VECTOR)
1362 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1363 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1364 return;
1365
1366 case 'I': /* enable/disable interrupts */
1367 if (xcode == CONST_INT)
1368 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1369 return;
1370
1371 case 'b': /* branch modifiers */
1372 if (xcode == REG)
1373 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1374 else if (COMPARISON_P (x))
1375 fprintf (file, "%s", xcode == NE ? "n" : "");
1376 return;
1377
1378 case 'i': /* indirect call */
1379 if (xcode == MEM)
1380 {
1381 if (GET_CODE (XEXP (x, 0)) == REG)
1382 /* Used in indirect function calls. */
1383 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1384 else
1385 output_address (XEXP (x, 0));
1386 }
1387 return;
1388
1389 case 'p': /* load/store */
1390 if (xcode == MEM)
1391 {
1392 x = XEXP (x, 0);
1393 xcode = GET_CODE (x);
1394 }
1395 if (xcode == AND)
1396 {
1397 x = XEXP (x, 0);
1398 xcode = GET_CODE (x);
1399 }
1400 if (xcode == REG)
1401 fprintf (file, "d");
1402 else if (xcode == CONST_INT)
1403 fprintf (file, "a");
1404 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1405 fprintf (file, "r");
1406 else if (xcode == PLUS || xcode == LO_SUM)
1407 {
1408 if (GET_CODE (XEXP (x, 1)) == REG)
1409 fprintf (file, "x");
1410 else
1411 fprintf (file, "d");
1412 }
1413 return;
1414
1415 case 'e':
1416 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1417 val &= 0x7;
1418 output_addr_const (file, GEN_INT (val));
1419 return;
1420
1421 case 'f':
1422 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1423 val &= 0x1f;
1424 output_addr_const (file, GEN_INT (val));
1425 return;
1426
1427 case 'g':
1428 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1429 val &= 0x3f;
1430 output_addr_const (file, GEN_INT (val));
1431 return;
1432
1433 case 'h':
1434 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1435 val = (val >> 3) & 0x1f;
1436 output_addr_const (file, GEN_INT (val));
1437 return;
1438
1439 case 'E':
1440 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1441 val = -val;
1442 val &= 0x7;
1443 output_addr_const (file, GEN_INT (val));
1444 return;
1445
1446 case 'F':
1447 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1448 val = -val;
1449 val &= 0x1f;
1450 output_addr_const (file, GEN_INT (val));
1451 return;
1452
1453 case 'G':
1454 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1455 val = -val;
1456 val &= 0x3f;
1457 output_addr_const (file, GEN_INT (val));
1458 return;
1459
1460 case 'H':
1461 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1462 val = -(val & -8ll);
1463 val = (val >> 3) & 0x1f;
1464 output_addr_const (file, GEN_INT (val));
1465 return;
1466
1467 case 0:
1468 if (xcode == REG)
1469 fprintf (file, "%s", reg_names[REGNO (x)]);
1470 else if (xcode == MEM)
1471 output_address (XEXP (x, 0));
1472 else if (xcode == CONST_VECTOR)
1473 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1474 else
1475 output_addr_const (file, x);
1476 return;
1477
1478 /* unused letters
1479 o qr uvw yz
1480 AB OPQR UVWXYZ */
1481 default:
1482 output_operand_lossage ("invalid %%xn code");
1483 }
1484 gcc_unreachable ();
1485 }
1486
1487 extern char call_used_regs[];
1488
1489 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1490 caller saved register. For leaf functions it is more efficient to
1491 use a volatile register because we won't need to save and restore the
1492 pic register. This routine is only valid after register allocation
1493 is completed, so we can pick an unused register. */
1494 static rtx
1495 get_pic_reg (void)
1496 {
1497 rtx pic_reg = pic_offset_table_rtx;
1498 if (!reload_completed && !reload_in_progress)
1499 abort ();
1500 return pic_reg;
1501 }
1502
1503 /* Split constant addresses to handle cases that are too large.
1504 Add in the pic register when in PIC mode.
1505 Split immediates that require more than 1 instruction. */
1506 int
1507 spu_split_immediate (rtx * ops)
1508 {
1509 enum machine_mode mode = GET_MODE (ops[0]);
1510 enum immediate_class c = classify_immediate (ops[1], mode);
1511
1512 switch (c)
1513 {
1514 case IC_IL2:
1515 {
1516 unsigned char arrhi[16];
1517 unsigned char arrlo[16];
1518 rtx to, hi, lo;
1519 int i;
1520 constant_to_array (mode, ops[1], arrhi);
1521 to = !can_create_pseudo_p () ? ops[0] : gen_reg_rtx (mode);
1522 for (i = 0; i < 16; i += 4)
1523 {
1524 arrlo[i + 2] = arrhi[i + 2];
1525 arrlo[i + 3] = arrhi[i + 3];
1526 arrlo[i + 0] = arrlo[i + 1] = 0;
1527 arrhi[i + 2] = arrhi[i + 3] = 0;
1528 }
1529 hi = array_to_constant (mode, arrhi);
1530 lo = array_to_constant (mode, arrlo);
1531 emit_move_insn (to, hi);
1532 emit_insn (gen_rtx_SET
1533 (VOIDmode, ops[0], gen_rtx_IOR (mode, to, lo)));
1534 return 1;
1535 }
1536 case IC_FSMBI2:
1537 {
1538 unsigned char arr_fsmbi[16];
1539 unsigned char arr_andbi[16];
1540 rtx to, reg_fsmbi, reg_and;
1541 int i;
1542 enum machine_mode imode = mode;
1543 /* We need to do reals as ints because the constant used in the
1544 * AND might not be a legitimate real constant. */
1545 imode = int_mode_for_mode (mode);
1546 constant_to_array (mode, ops[1], arr_fsmbi);
1547 if (imode != mode)
1548 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1549 else
1550 to = ops[0];
1551 for (i = 0; i < 16; i++)
1552 if (arr_fsmbi[i] != 0)
1553 {
1554 arr_andbi[0] = arr_fsmbi[i];
1555 arr_fsmbi[i] = 0xff;
1556 }
1557 for (i = 1; i < 16; i++)
1558 arr_andbi[i] = arr_andbi[0];
1559 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1560 reg_and = array_to_constant (imode, arr_andbi);
1561 emit_move_insn (to, reg_fsmbi);
1562 emit_insn (gen_rtx_SET
1563 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1564 return 1;
1565 }
1566 case IC_POOL:
1567 if (reload_in_progress || reload_completed)
1568 {
1569 rtx mem = force_const_mem (mode, ops[1]);
1570 if (TARGET_LARGE_MEM)
1571 {
1572 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1573 emit_move_insn (addr, XEXP (mem, 0));
1574 mem = replace_equiv_address (mem, addr);
1575 }
1576 emit_move_insn (ops[0], mem);
1577 return 1;
1578 }
1579 break;
1580 case IC_IL1s:
1581 case IC_IL2s:
1582 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1583 {
1584 if (c == IC_IL2s)
1585 {
1586 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1587 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1588 }
1589 else if (flag_pic)
1590 emit_insn (gen_pic (ops[0], ops[1]));
1591 if (flag_pic)
1592 {
1593 rtx pic_reg = get_pic_reg ();
1594 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1595 crtl->uses_pic_offset_table = 1;
1596 }
1597 return flag_pic || c == IC_IL2s;
1598 }
1599 break;
1600 case IC_IL1:
1601 case IC_FSMBI:
1602 case IC_CPAT:
1603 break;
1604 }
1605 return 0;
1606 }
1607
1608 /* SAVING is TRUE when we are generating the actual load and store
1609 instructions for REGNO. When determining the size of the stack
1610 needed for saving register we must allocate enough space for the
1611 worst case, because we don't always have the information early enough
1612 to not allocate it. But we can at least eliminate the actual loads
1613 and stores during the prologue/epilogue. */
1614 static int
1615 need_to_save_reg (int regno, int saving)
1616 {
1617 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1618 return 1;
1619 if (flag_pic
1620 && regno == PIC_OFFSET_TABLE_REGNUM
1621 && (!saving || crtl->uses_pic_offset_table)
1622 && (!saving
1623 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1624 return 1;
1625 return 0;
1626 }
1627
1628 /* This function is only correct starting with local register
1629 allocation */
1630 int
1631 spu_saved_regs_size (void)
1632 {
1633 int reg_save_size = 0;
1634 int regno;
1635
1636 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1637 if (need_to_save_reg (regno, 0))
1638 reg_save_size += 0x10;
1639 return reg_save_size;
1640 }
1641
1642 static rtx
1643 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1644 {
1645 rtx reg = gen_rtx_REG (V4SImode, regno);
1646 rtx mem =
1647 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1648 return emit_insn (gen_movv4si (mem, reg));
1649 }
1650
1651 static rtx
1652 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1653 {
1654 rtx reg = gen_rtx_REG (V4SImode, regno);
1655 rtx mem =
1656 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1657 return emit_insn (gen_movv4si (reg, mem));
1658 }
1659
1660 /* This happens after reload, so we need to expand it. */
1661 static rtx
1662 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1663 {
1664 rtx insn;
1665 if (satisfies_constraint_K (GEN_INT (imm)))
1666 {
1667 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1668 }
1669 else
1670 {
1671 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1672 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1673 if (REGNO (src) == REGNO (scratch))
1674 abort ();
1675 }
1676 return insn;
1677 }
1678
1679 /* Return nonzero if this function is known to have a null epilogue. */
1680
1681 int
1682 direct_return (void)
1683 {
1684 if (reload_completed)
1685 {
1686 if (cfun->static_chain_decl == 0
1687 && (spu_saved_regs_size ()
1688 + get_frame_size ()
1689 + crtl->outgoing_args_size
1690 + crtl->args.pretend_args_size == 0)
1691 && current_function_is_leaf)
1692 return 1;
1693 }
1694 return 0;
1695 }
1696
1697 /*
1698 The stack frame looks like this:
1699 +-------------+
1700 | incoming |
1701 AP | args |
1702 +-------------+
1703 | $lr save |
1704 +-------------+
1705 prev SP | back chain |
1706 +-------------+
1707 | var args |
1708 | reg save | crtl->args.pretend_args_size bytes
1709 +-------------+
1710 | ... |
1711 | saved regs | spu_saved_regs_size() bytes
1712 +-------------+
1713 | ... |
1714 FP | vars | get_frame_size() bytes
1715 +-------------+
1716 | ... |
1717 | outgoing |
1718 | args | crtl->outgoing_args_size bytes
1719 +-------------+
1720 | $lr of next |
1721 | frame |
1722 +-------------+
1723 SP | back chain |
1724 +-------------+
1725
1726 */
1727 void
1728 spu_expand_prologue (void)
1729 {
1730 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1731 HOST_WIDE_INT total_size;
1732 HOST_WIDE_INT saved_regs_size;
1733 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1734 rtx scratch_reg_0, scratch_reg_1;
1735 rtx insn, real;
1736
1737 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1738 the "toplevel" insn chain. */
1739 emit_note (NOTE_INSN_DELETED);
1740
1741 if (flag_pic && optimize == 0)
1742 crtl->uses_pic_offset_table = 1;
1743
1744 if (spu_naked_function_p (current_function_decl))
1745 return;
1746
1747 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1748 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1749
1750 saved_regs_size = spu_saved_regs_size ();
1751 total_size = size + saved_regs_size
1752 + crtl->outgoing_args_size
1753 + crtl->args.pretend_args_size;
1754
1755 if (!current_function_is_leaf
1756 || cfun->calls_alloca || total_size > 0)
1757 total_size += STACK_POINTER_OFFSET;
1758
1759 /* Save this first because code after this might use the link
1760 register as a scratch register. */
1761 if (!current_function_is_leaf)
1762 {
1763 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
1764 RTX_FRAME_RELATED_P (insn) = 1;
1765 }
1766
1767 if (total_size > 0)
1768 {
1769 offset = -crtl->args.pretend_args_size;
1770 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1771 if (need_to_save_reg (regno, 1))
1772 {
1773 offset -= 16;
1774 insn = frame_emit_store (regno, sp_reg, offset);
1775 RTX_FRAME_RELATED_P (insn) = 1;
1776 }
1777 }
1778
1779 if (flag_pic && crtl->uses_pic_offset_table)
1780 {
1781 rtx pic_reg = get_pic_reg ();
1782 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
1783 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
1784 }
1785
1786 if (total_size > 0)
1787 {
1788 if (flag_stack_check)
1789 {
1790 /* We compare against total_size-1 because
1791 ($sp >= total_size) <=> ($sp > total_size-1) */
1792 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
1793 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
1794 rtx size_v4si = spu_const (V4SImode, total_size - 1);
1795 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
1796 {
1797 emit_move_insn (scratch_v4si, size_v4si);
1798 size_v4si = scratch_v4si;
1799 }
1800 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
1801 emit_insn (gen_vec_extractv4si
1802 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
1803 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
1804 }
1805
1806 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
1807 the value of the previous $sp because we save it as the back
1808 chain. */
1809 if (total_size <= 2000)
1810 {
1811 /* In this case we save the back chain first. */
1812 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
1813 insn =
1814 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
1815 }
1816 else if (satisfies_constraint_K (GEN_INT (-total_size)))
1817 {
1818 insn = emit_move_insn (scratch_reg_0, sp_reg);
1819 insn =
1820 emit_insn (gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size)));
1821 }
1822 else
1823 {
1824 insn = emit_move_insn (scratch_reg_0, sp_reg);
1825 insn =
1826 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
1827 }
1828 RTX_FRAME_RELATED_P (insn) = 1;
1829 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
1830 REG_NOTES (insn) =
1831 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, real, REG_NOTES (insn));
1832
1833 if (total_size > 2000)
1834 {
1835 /* Save the back chain ptr */
1836 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
1837 }
1838
1839 if (frame_pointer_needed)
1840 {
1841 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
1842 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
1843 + crtl->outgoing_args_size;
1844 /* Set the new frame_pointer */
1845 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
1846 RTX_FRAME_RELATED_P (insn) = 1;
1847 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
1848 REG_NOTES (insn) =
1849 gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
1850 real, REG_NOTES (insn));
1851 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
1852 }
1853 }
1854
1855 emit_note (NOTE_INSN_DELETED);
1856 }
1857
1858 void
1859 spu_expand_epilogue (bool sibcall_p)
1860 {
1861 int size = get_frame_size (), offset, regno;
1862 HOST_WIDE_INT saved_regs_size, total_size;
1863 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1864 rtx jump, scratch_reg_0;
1865
1866 /* A NOTE_INSN_DELETED is supposed to be at the start and end of
1867 the "toplevel" insn chain. */
1868 emit_note (NOTE_INSN_DELETED);
1869
1870 if (spu_naked_function_p (current_function_decl))
1871 return;
1872
1873 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1874
1875 saved_regs_size = spu_saved_regs_size ();
1876 total_size = size + saved_regs_size
1877 + crtl->outgoing_args_size
1878 + crtl->args.pretend_args_size;
1879
1880 if (!current_function_is_leaf
1881 || cfun->calls_alloca || total_size > 0)
1882 total_size += STACK_POINTER_OFFSET;
1883
1884 if (total_size > 0)
1885 {
1886 if (cfun->calls_alloca)
1887 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
1888 else
1889 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
1890
1891
1892 if (saved_regs_size > 0)
1893 {
1894 offset = -crtl->args.pretend_args_size;
1895 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
1896 if (need_to_save_reg (regno, 1))
1897 {
1898 offset -= 0x10;
1899 frame_emit_load (regno, sp_reg, offset);
1900 }
1901 }
1902 }
1903
1904 if (!current_function_is_leaf)
1905 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
1906
1907 if (!sibcall_p)
1908 {
1909 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
1910 jump = emit_jump_insn (gen__return ());
1911 emit_barrier_after (jump);
1912 }
1913
1914 emit_note (NOTE_INSN_DELETED);
1915 }
1916
1917 rtx
1918 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
1919 {
1920 if (count != 0)
1921 return 0;
1922 /* This is inefficient because it ends up copying to a save-register
1923 which then gets saved even though $lr has already been saved. But
1924 it does generate better code for leaf functions and we don't need
1925 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
1926 used for __builtin_return_address anyway, so maybe we don't care if
1927 it's inefficient. */
1928 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
1929 }
1930 \f
1931
1932 /* Given VAL, generate a constant appropriate for MODE.
1933 If MODE is a vector mode, every element will be VAL.
1934 For TImode, VAL will be zero extended to 128 bits. */
1935 rtx
1936 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
1937 {
1938 rtx inner;
1939 rtvec v;
1940 int units, i;
1941
1942 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
1943 || GET_MODE_CLASS (mode) == MODE_FLOAT
1944 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
1945 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
1946
1947 if (GET_MODE_CLASS (mode) == MODE_INT)
1948 return immed_double_const (val, 0, mode);
1949
1950 /* val is the bit representation of the float */
1951 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1952 return hwint_to_const_double (mode, val);
1953
1954 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
1955 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
1956 else
1957 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
1958
1959 units = GET_MODE_NUNITS (mode);
1960
1961 v = rtvec_alloc (units);
1962
1963 for (i = 0; i < units; ++i)
1964 RTVEC_ELT (v, i) = inner;
1965
1966 return gen_rtx_CONST_VECTOR (mode, v);
1967 }
1968 \f
1969 /* branch hint stuff */
1970
1971 /* The hardware requires 8 insns between a hint and the branch it
1972 effects. This variable describes how many rtl instructions the
1973 compiler needs to see before inserting a hint. (FIXME: We should
1974 accept less and insert nops to enforce it because hinting is always
1975 profitable for performance, but we do need to be careful of code
1976 size.) */
1977 int spu_hint_dist = (8 * 4);
1978
1979 /* Create a MODE vector constant from 4 ints. */
1980 rtx
1981 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
1982 {
1983 unsigned char arr[16];
1984 arr[0] = (a >> 24) & 0xff;
1985 arr[1] = (a >> 16) & 0xff;
1986 arr[2] = (a >> 8) & 0xff;
1987 arr[3] = (a >> 0) & 0xff;
1988 arr[4] = (b >> 24) & 0xff;
1989 arr[5] = (b >> 16) & 0xff;
1990 arr[6] = (b >> 8) & 0xff;
1991 arr[7] = (b >> 0) & 0xff;
1992 arr[8] = (c >> 24) & 0xff;
1993 arr[9] = (c >> 16) & 0xff;
1994 arr[10] = (c >> 8) & 0xff;
1995 arr[11] = (c >> 0) & 0xff;
1996 arr[12] = (d >> 24) & 0xff;
1997 arr[13] = (d >> 16) & 0xff;
1998 arr[14] = (d >> 8) & 0xff;
1999 arr[15] = (d >> 0) & 0xff;
2000 return array_to_constant(mode, arr);
2001 }
2002
2003 /* An array of these is used to propagate hints to predecessor blocks. */
2004 struct spu_bb_info
2005 {
2006 rtx prop_jump; /* propagated from another block */
2007 basic_block bb; /* the original block. */
2008 };
2009
2010 /* The special $hbr register is used to prevent the insn scheduler from
2011 moving hbr insns across instructions which invalidate them. It
2012 should only be used in a clobber, and this function searches for
2013 insns which clobber it. */
2014 static bool
2015 insn_clobbers_hbr (rtx insn)
2016 {
2017 if (INSN_P (insn) && GET_CODE (PATTERN (insn)) == PARALLEL)
2018 {
2019 rtx parallel = PATTERN (insn);
2020 rtx clobber;
2021 int j;
2022 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2023 {
2024 clobber = XVECEXP (parallel, 0, j);
2025 if (GET_CODE (clobber) == CLOBBER
2026 && GET_CODE (XEXP (clobber, 0)) == REG
2027 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2028 return 1;
2029 }
2030 }
2031 return 0;
2032 }
2033
2034 static void
2035 spu_emit_branch_hint (rtx before, rtx branch, rtx target, int distance)
2036 {
2037 rtx branch_label;
2038 rtx hint, insn, prev, next;
2039
2040 if (before == 0 || branch == 0 || target == 0)
2041 return;
2042
2043 if (distance > 600)
2044 return;
2045
2046
2047 branch_label = gen_label_rtx ();
2048 LABEL_NUSES (branch_label)++;
2049 LABEL_PRESERVE_P (branch_label) = 1;
2050 insn = emit_label_before (branch_label, branch);
2051 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2052
2053 /* If the previous insn is pipe0, make the hbr dual issue with it. If
2054 the current insn is pipe0, dual issue with it. */
2055 prev = prev_active_insn (before);
2056 if (prev && get_pipe (prev) == 0)
2057 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2058 else if (get_pipe (before) == 0 && distance > spu_hint_dist)
2059 {
2060 next = next_active_insn (before);
2061 hint = emit_insn_after (gen_hbr (branch_label, target), before);
2062 if (next)
2063 PUT_MODE (next, TImode);
2064 }
2065 else
2066 {
2067 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2068 PUT_MODE (hint, TImode);
2069 }
2070 recog_memoized (hint);
2071 }
2072
2073 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2074 the rtx for the branch target. */
2075 static rtx
2076 get_branch_target (rtx branch)
2077 {
2078 if (GET_CODE (branch) == JUMP_INSN)
2079 {
2080 rtx set, src;
2081
2082 /* Return statements */
2083 if (GET_CODE (PATTERN (branch)) == RETURN)
2084 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2085
2086 /* jump table */
2087 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2088 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2089 return 0;
2090
2091 set = single_set (branch);
2092 src = SET_SRC (set);
2093 if (GET_CODE (SET_DEST (set)) != PC)
2094 abort ();
2095
2096 if (GET_CODE (src) == IF_THEN_ELSE)
2097 {
2098 rtx lab = 0;
2099 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2100 if (note)
2101 {
2102 /* If the more probable case is not a fall through, then
2103 try a branch hint. */
2104 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2105 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2106 && GET_CODE (XEXP (src, 1)) != PC)
2107 lab = XEXP (src, 1);
2108 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2109 && GET_CODE (XEXP (src, 2)) != PC)
2110 lab = XEXP (src, 2);
2111 }
2112 if (lab)
2113 {
2114 if (GET_CODE (lab) == RETURN)
2115 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2116 return lab;
2117 }
2118 return 0;
2119 }
2120
2121 return src;
2122 }
2123 else if (GET_CODE (branch) == CALL_INSN)
2124 {
2125 rtx call;
2126 /* All of our call patterns are in a PARALLEL and the CALL is
2127 the first pattern in the PARALLEL. */
2128 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2129 abort ();
2130 call = XVECEXP (PATTERN (branch), 0, 0);
2131 if (GET_CODE (call) == SET)
2132 call = SET_SRC (call);
2133 if (GET_CODE (call) != CALL)
2134 abort ();
2135 return XEXP (XEXP (call, 0), 0);
2136 }
2137 return 0;
2138 }
2139
2140 static void
2141 insert_branch_hints (void)
2142 {
2143 struct spu_bb_info *spu_bb_info;
2144 rtx branch, insn, next;
2145 rtx branch_target = 0;
2146 int branch_addr = 0, insn_addr, head_addr;
2147 basic_block bb;
2148 unsigned int j;
2149
2150 spu_bb_info =
2151 (struct spu_bb_info *) xcalloc (last_basic_block + 1,
2152 sizeof (struct spu_bb_info));
2153
2154 /* We need exact insn addresses and lengths. */
2155 shorten_branches (get_insns ());
2156
2157 FOR_EACH_BB_REVERSE (bb)
2158 {
2159 head_addr = INSN_ADDRESSES (INSN_UID (BB_HEAD (bb)));
2160 branch = 0;
2161 if (spu_bb_info[bb->index].prop_jump)
2162 {
2163 branch = spu_bb_info[bb->index].prop_jump;
2164 branch_target = get_branch_target (branch);
2165 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2166 }
2167 /* Search from end of a block to beginning. In this loop, find
2168 jumps which need a branch and emit them only when:
2169 - it's an indirect branch and we're at the insn which sets
2170 the register
2171 - we're at an insn that will invalidate the hint. e.g., a
2172 call, another hint insn, inline asm that clobbers $hbr, and
2173 some inlined operations (divmodsi4). Don't consider jumps
2174 because they are only at the end of a block and are
2175 considered when we are deciding whether to propagate
2176 - we're getting too far away from the branch. The hbr insns
2177 only have a signed 10-bit offset
2178 We go back as far as possible so the branch will be considered
2179 for propagation when we get to the beginning of the block. */
2180 next = 0;
2181 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2182 {
2183 if (INSN_P (insn))
2184 {
2185 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2186 if (branch && next
2187 && ((GET_CODE (branch_target) == REG
2188 && set_of (branch_target, insn) != NULL_RTX)
2189 || insn_clobbers_hbr (insn)
2190 || branch_addr - insn_addr > 600))
2191 {
2192 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2193 if (insn != BB_END (bb)
2194 && branch_addr - next_addr >= spu_hint_dist)
2195 {
2196 if (dump_file)
2197 fprintf (dump_file,
2198 "hint for %i in block %i before %i\n",
2199 INSN_UID (branch), bb->index, INSN_UID (next));
2200 spu_emit_branch_hint (next, branch, branch_target,
2201 branch_addr - next_addr);
2202 }
2203 branch = 0;
2204 }
2205
2206 /* JUMP_P will only be true at the end of a block. When
2207 branch is already set it means we've previously decided
2208 to propagate a hint for that branch into this block. */
2209 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2210 {
2211 branch = 0;
2212 if ((branch_target = get_branch_target (insn)))
2213 {
2214 branch = insn;
2215 branch_addr = insn_addr;
2216 }
2217 }
2218
2219 /* When a branch hint is emitted it will be inserted
2220 before "next". Make sure next is the beginning of a
2221 cycle to minimize impact on the scheduled insns. */
2222 if (GET_MODE (insn) == TImode)
2223 next = insn;
2224 }
2225 if (insn == BB_HEAD (bb))
2226 break;
2227 }
2228
2229 if (branch)
2230 {
2231 /* If we haven't emitted a hint for this branch yet, it might
2232 be profitable to emit it in one of the predecessor blocks,
2233 especially for loops. */
2234 rtx bbend;
2235 basic_block prev = 0, prop = 0, prev2 = 0;
2236 int loop_exit = 0, simple_loop = 0;
2237 int next_addr = 0;
2238 if (next)
2239 next_addr = INSN_ADDRESSES (INSN_UID (next));
2240
2241 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2242 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2243 prev = EDGE_PRED (bb, j)->src;
2244 else
2245 prev2 = EDGE_PRED (bb, j)->src;
2246
2247 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2248 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2249 loop_exit = 1;
2250 else if (EDGE_SUCC (bb, j)->dest == bb)
2251 simple_loop = 1;
2252
2253 /* If this branch is a loop exit then propagate to previous
2254 fallthru block. This catches the cases when it is a simple
2255 loop or when there is an initial branch into the loop. */
2256 if (prev && loop_exit && prev->loop_depth <= bb->loop_depth)
2257 prop = prev;
2258
2259 /* If there is only one adjacent predecessor. Don't propagate
2260 outside this loop. This loop_depth test isn't perfect, but
2261 I'm not sure the loop_father member is valid at this point. */
2262 else if (prev && single_pred_p (bb)
2263 && prev->loop_depth == bb->loop_depth)
2264 prop = prev;
2265
2266 /* If this is the JOIN block of a simple IF-THEN then
2267 propagate the hint to the HEADER block. */
2268 else if (prev && prev2
2269 && EDGE_COUNT (bb->preds) == 2
2270 && EDGE_COUNT (prev->preds) == 1
2271 && EDGE_PRED (prev, 0)->src == prev2
2272 && prev2->loop_depth == bb->loop_depth
2273 && GET_CODE (branch_target) != REG)
2274 prop = prev;
2275
2276 /* Don't propagate when:
2277 - this is a simple loop and the hint would be too far
2278 - this is not a simple loop and there are 16 insns in
2279 this block already
2280 - the predecessor block ends in a branch that will be
2281 hinted
2282 - the predecessor block ends in an insn that invalidates
2283 the hint */
2284 if (prop
2285 && prop->index >= 0
2286 && (bbend = BB_END (prop))
2287 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2288 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2289 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2290 {
2291 if (dump_file)
2292 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2293 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2294 bb->index, prop->index, bb->loop_depth,
2295 INSN_UID (branch), loop_exit, simple_loop,
2296 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2297
2298 spu_bb_info[prop->index].prop_jump = branch;
2299 spu_bb_info[prop->index].bb = bb;
2300 }
2301 else if (next && branch_addr - next_addr >= spu_hint_dist)
2302 {
2303 if (dump_file)
2304 fprintf (dump_file, "hint for %i in block %i before %i\n",
2305 INSN_UID (branch), bb->index, INSN_UID (next));
2306 spu_emit_branch_hint (next, branch, branch_target,
2307 branch_addr - next_addr);
2308 }
2309 branch = 0;
2310 }
2311 }
2312 free (spu_bb_info);
2313 }
2314 \f
2315 /* Emit a nop for INSN such that the two will dual issue. This assumes
2316 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2317 We check for TImode to handle a MULTI1 insn which has dual issued its
2318 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2319 ADDR_VEC insns. */
2320 static void
2321 emit_nop_for_insn (rtx insn)
2322 {
2323 int p;
2324 rtx new_insn;
2325 p = get_pipe (insn);
2326 if (p == 1 && GET_MODE (insn) == TImode)
2327 {
2328 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2329 PUT_MODE (new_insn, TImode);
2330 PUT_MODE (insn, VOIDmode);
2331 }
2332 else
2333 new_insn = emit_insn_after (gen_lnop (), insn);
2334 }
2335
2336 /* Insert nops in basic blocks to meet dual issue alignment
2337 requirements. */
2338 static void
2339 insert_nops (void)
2340 {
2341 rtx insn, next_insn, prev_insn;
2342 int length;
2343 int addr;
2344
2345 /* This sets up INSN_ADDRESSES. */
2346 shorten_branches (get_insns ());
2347
2348 /* Keep track of length added by nops. */
2349 length = 0;
2350
2351 prev_insn = 0;
2352 for (insn = get_insns (); insn; insn = next_insn)
2353 {
2354 next_insn = next_active_insn (insn);
2355 addr = INSN_ADDRESSES (INSN_UID (insn));
2356 if (GET_MODE (insn) == TImode
2357 && next_insn
2358 && GET_MODE (next_insn) != TImode
2359 && ((addr + length) & 7) != 0)
2360 {
2361 /* prev_insn will always be set because the first insn is
2362 always 8-byte aligned. */
2363 emit_nop_for_insn (prev_insn);
2364 length += 4;
2365 }
2366 prev_insn = insn;
2367 }
2368 }
2369
2370 static void
2371 spu_machine_dependent_reorg (void)
2372 {
2373 if (optimize > 0)
2374 {
2375 if (TARGET_BRANCH_HINTS)
2376 insert_branch_hints ();
2377 insert_nops ();
2378 }
2379 }
2380 \f
2381
2382 /* Insn scheduling routines, primarily for dual issue. */
2383 static int
2384 spu_sched_issue_rate (void)
2385 {
2386 return 2;
2387 }
2388
2389 static int
2390 spu_sched_variable_issue (FILE * dump ATTRIBUTE_UNUSED,
2391 int verbose ATTRIBUTE_UNUSED, rtx insn,
2392 int can_issue_more)
2393 {
2394 if (GET_CODE (PATTERN (insn)) != USE
2395 && GET_CODE (PATTERN (insn)) != CLOBBER
2396 && get_pipe (insn) != -2)
2397 can_issue_more--;
2398 return can_issue_more;
2399 }
2400
2401 static int
2402 get_pipe (rtx insn)
2403 {
2404 enum attr_type t;
2405 /* Handle inline asm */
2406 if (INSN_CODE (insn) == -1)
2407 return -1;
2408 t = get_attr_type (insn);
2409 switch (t)
2410 {
2411 case TYPE_CONVERT:
2412 return -2;
2413 case TYPE_MULTI0:
2414 return -1;
2415
2416 case TYPE_FX2:
2417 case TYPE_FX3:
2418 case TYPE_SPR:
2419 case TYPE_NOP:
2420 case TYPE_FXB:
2421 case TYPE_FPD:
2422 case TYPE_FP6:
2423 case TYPE_FP7:
2424 case TYPE_IPREFETCH:
2425 return 0;
2426
2427 case TYPE_LNOP:
2428 case TYPE_SHUF:
2429 case TYPE_LOAD:
2430 case TYPE_STORE:
2431 case TYPE_BR:
2432 case TYPE_MULTI1:
2433 case TYPE_HBR:
2434 return 1;
2435 default:
2436 abort ();
2437 }
2438 }
2439
2440 static int
2441 spu_sched_adjust_priority (rtx insn, int pri)
2442 {
2443 int p = get_pipe (insn);
2444 /* Schedule UNSPEC_CONVERT's early so they have less effect on
2445 * scheduling. */
2446 if (GET_CODE (PATTERN (insn)) == USE
2447 || GET_CODE (PATTERN (insn)) == CLOBBER
2448 || p == -2)
2449 return pri + 100;
2450 /* Schedule pipe0 insns early for greedier dual issue. */
2451 if (p != 1)
2452 return pri + 50;
2453 return pri;
2454 }
2455
2456 /* INSN is dependent on DEP_INSN. */
2457 static int
2458 spu_sched_adjust_cost (rtx insn, rtx link ATTRIBUTE_UNUSED,
2459 rtx dep_insn ATTRIBUTE_UNUSED, int cost)
2460 {
2461 if (GET_CODE (insn) == CALL_INSN)
2462 return cost - 2;
2463 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
2464 scheduler makes every insn in a block anti-dependent on the final
2465 jump_insn. We adjust here so higher cost insns will get scheduled
2466 earlier. */
2467 if (GET_CODE (insn) == JUMP_INSN && REG_NOTE_KIND (link) == REG_DEP_ANTI)
2468 return insn_cost (dep_insn) - 3;
2469 return cost;
2470 }
2471 \f
2472 /* Create a CONST_DOUBLE from a string. */
2473 struct rtx_def *
2474 spu_float_const (const char *string, enum machine_mode mode)
2475 {
2476 REAL_VALUE_TYPE value;
2477 value = REAL_VALUE_ATOF (string, mode);
2478 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
2479 }
2480
2481 int
2482 spu_constant_address_p (rtx x)
2483 {
2484 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
2485 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
2486 || GET_CODE (x) == HIGH);
2487 }
2488
2489 static enum spu_immediate
2490 which_immediate_load (HOST_WIDE_INT val)
2491 {
2492 gcc_assert (val == trunc_int_for_mode (val, SImode));
2493
2494 if (val >= -0x8000 && val <= 0x7fff)
2495 return SPU_IL;
2496 if (val >= 0 && val <= 0x3ffff)
2497 return SPU_ILA;
2498 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2499 return SPU_ILH;
2500 if ((val & 0xffff) == 0)
2501 return SPU_ILHU;
2502
2503 return SPU_NONE;
2504 }
2505
2506 /* Return true when OP can be loaded by one of the il instructions, or
2507 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
2508 int
2509 immediate_load_p (rtx op, enum machine_mode mode)
2510 {
2511 if (CONSTANT_P (op))
2512 {
2513 enum immediate_class c = classify_immediate (op, mode);
2514 return c == IC_IL1 || c == IC_IL1s
2515 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
2516 }
2517 return 0;
2518 }
2519
2520 /* Return true if the first SIZE bytes of arr is a constant that can be
2521 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
2522 represent the size and offset of the instruction to use. */
2523 static int
2524 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
2525 {
2526 int cpat, run, i, start;
2527 cpat = 1;
2528 run = 0;
2529 start = -1;
2530 for (i = 0; i < size && cpat; i++)
2531 if (arr[i] != i+16)
2532 {
2533 if (!run)
2534 {
2535 start = i;
2536 if (arr[i] == 3)
2537 run = 1;
2538 else if (arr[i] == 2 && arr[i+1] == 3)
2539 run = 2;
2540 else if (arr[i] == 0)
2541 {
2542 while (arr[i+run] == run && i+run < 16)
2543 run++;
2544 if (run != 4 && run != 8)
2545 cpat = 0;
2546 }
2547 else
2548 cpat = 0;
2549 if ((i & (run-1)) != 0)
2550 cpat = 0;
2551 i += run;
2552 }
2553 else
2554 cpat = 0;
2555 }
2556 if (cpat && (run || size < 16))
2557 {
2558 if (run == 0)
2559 run = 1;
2560 if (prun)
2561 *prun = run;
2562 if (pstart)
2563 *pstart = start == -1 ? 16-run : start;
2564 return 1;
2565 }
2566 return 0;
2567 }
2568
2569 /* OP is a CONSTANT_P. Determine what instructions can be used to load
2570 it into a register. MODE is only valid when OP is a CONST_INT. */
2571 static enum immediate_class
2572 classify_immediate (rtx op, enum machine_mode mode)
2573 {
2574 HOST_WIDE_INT val;
2575 unsigned char arr[16];
2576 int i, j, repeated, fsmbi, repeat;
2577
2578 gcc_assert (CONSTANT_P (op));
2579
2580 if (GET_MODE (op) != VOIDmode)
2581 mode = GET_MODE (op);
2582
2583 /* A V4SI const_vector with all identical symbols is ok. */
2584 if (!flag_pic
2585 && mode == V4SImode
2586 && GET_CODE (op) == CONST_VECTOR
2587 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
2588 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
2589 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
2590 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
2591 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
2592 op = CONST_VECTOR_ELT (op, 0);
2593
2594 switch (GET_CODE (op))
2595 {
2596 case SYMBOL_REF:
2597 case LABEL_REF:
2598 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
2599
2600 case CONST:
2601 /* We can never know if the resulting address fits in 18 bits and can be
2602 loaded with ila. For now, assume the address will not overflow if
2603 the displacement is "small" (fits 'K' constraint). */
2604 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
2605 {
2606 rtx sym = XEXP (XEXP (op, 0), 0);
2607 rtx cst = XEXP (XEXP (op, 0), 1);
2608
2609 if (GET_CODE (sym) == SYMBOL_REF
2610 && GET_CODE (cst) == CONST_INT
2611 && satisfies_constraint_K (cst))
2612 return IC_IL1s;
2613 }
2614 return IC_IL2s;
2615
2616 case HIGH:
2617 return IC_IL1s;
2618
2619 case CONST_VECTOR:
2620 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
2621 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
2622 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
2623 return IC_POOL;
2624 /* Fall through. */
2625
2626 case CONST_INT:
2627 case CONST_DOUBLE:
2628 constant_to_array (mode, op, arr);
2629
2630 /* Check that each 4-byte slot is identical. */
2631 repeated = 1;
2632 for (i = 4; i < 16; i += 4)
2633 for (j = 0; j < 4; j++)
2634 if (arr[j] != arr[i + j])
2635 repeated = 0;
2636
2637 if (repeated)
2638 {
2639 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2640 val = trunc_int_for_mode (val, SImode);
2641
2642 if (which_immediate_load (val) != SPU_NONE)
2643 return IC_IL1;
2644 }
2645
2646 /* Any mode of 2 bytes or smaller can be loaded with an il
2647 instruction. */
2648 gcc_assert (GET_MODE_SIZE (mode) > 2);
2649
2650 fsmbi = 1;
2651 repeat = 0;
2652 for (i = 0; i < 16 && fsmbi; i++)
2653 if (arr[i] != 0 && repeat == 0)
2654 repeat = arr[i];
2655 else if (arr[i] != 0 && arr[i] != repeat)
2656 fsmbi = 0;
2657 if (fsmbi)
2658 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
2659
2660 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
2661 return IC_CPAT;
2662
2663 if (repeated)
2664 return IC_IL2;
2665
2666 return IC_POOL;
2667 default:
2668 break;
2669 }
2670 gcc_unreachable ();
2671 }
2672
2673 static enum spu_immediate
2674 which_logical_immediate (HOST_WIDE_INT val)
2675 {
2676 gcc_assert (val == trunc_int_for_mode (val, SImode));
2677
2678 if (val >= -0x200 && val <= 0x1ff)
2679 return SPU_ORI;
2680 if (val >= 0 && val <= 0xffff)
2681 return SPU_IOHL;
2682 if ((val & 0xffff) == ((val >> 16) & 0xffff))
2683 {
2684 val = trunc_int_for_mode (val, HImode);
2685 if (val >= -0x200 && val <= 0x1ff)
2686 return SPU_ORHI;
2687 if ((val & 0xff) == ((val >> 8) & 0xff))
2688 {
2689 val = trunc_int_for_mode (val, QImode);
2690 if (val >= -0x200 && val <= 0x1ff)
2691 return SPU_ORBI;
2692 }
2693 }
2694 return SPU_NONE;
2695 }
2696
2697 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
2698 CONST_DOUBLEs. */
2699 static int
2700 const_vector_immediate_p (rtx x)
2701 {
2702 int i;
2703 gcc_assert (GET_CODE (x) == CONST_VECTOR);
2704 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
2705 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
2706 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
2707 return 0;
2708 return 1;
2709 }
2710
2711 int
2712 logical_immediate_p (rtx op, enum machine_mode mode)
2713 {
2714 HOST_WIDE_INT val;
2715 unsigned char arr[16];
2716 int i, j;
2717
2718 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2719 || GET_CODE (op) == CONST_VECTOR);
2720
2721 if (GET_CODE (op) == CONST_VECTOR
2722 && !const_vector_immediate_p (op))
2723 return 0;
2724
2725 if (GET_MODE (op) != VOIDmode)
2726 mode = GET_MODE (op);
2727
2728 constant_to_array (mode, op, arr);
2729
2730 /* Check that bytes are repeated. */
2731 for (i = 4; i < 16; i += 4)
2732 for (j = 0; j < 4; j++)
2733 if (arr[j] != arr[i + j])
2734 return 0;
2735
2736 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2737 val = trunc_int_for_mode (val, SImode);
2738
2739 i = which_logical_immediate (val);
2740 return i != SPU_NONE && i != SPU_IOHL;
2741 }
2742
2743 int
2744 iohl_immediate_p (rtx op, enum machine_mode mode)
2745 {
2746 HOST_WIDE_INT val;
2747 unsigned char arr[16];
2748 int i, j;
2749
2750 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2751 || GET_CODE (op) == CONST_VECTOR);
2752
2753 if (GET_CODE (op) == CONST_VECTOR
2754 && !const_vector_immediate_p (op))
2755 return 0;
2756
2757 if (GET_MODE (op) != VOIDmode)
2758 mode = GET_MODE (op);
2759
2760 constant_to_array (mode, op, arr);
2761
2762 /* Check that bytes are repeated. */
2763 for (i = 4; i < 16; i += 4)
2764 for (j = 0; j < 4; j++)
2765 if (arr[j] != arr[i + j])
2766 return 0;
2767
2768 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
2769 val = trunc_int_for_mode (val, SImode);
2770
2771 return val >= 0 && val <= 0xffff;
2772 }
2773
2774 int
2775 arith_immediate_p (rtx op, enum machine_mode mode,
2776 HOST_WIDE_INT low, HOST_WIDE_INT high)
2777 {
2778 HOST_WIDE_INT val;
2779 unsigned char arr[16];
2780 int bytes, i, j;
2781
2782 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
2783 || GET_CODE (op) == CONST_VECTOR);
2784
2785 if (GET_CODE (op) == CONST_VECTOR
2786 && !const_vector_immediate_p (op))
2787 return 0;
2788
2789 if (GET_MODE (op) != VOIDmode)
2790 mode = GET_MODE (op);
2791
2792 constant_to_array (mode, op, arr);
2793
2794 if (VECTOR_MODE_P (mode))
2795 mode = GET_MODE_INNER (mode);
2796
2797 bytes = GET_MODE_SIZE (mode);
2798 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
2799
2800 /* Check that bytes are repeated. */
2801 for (i = bytes; i < 16; i += bytes)
2802 for (j = 0; j < bytes; j++)
2803 if (arr[j] != arr[i + j])
2804 return 0;
2805
2806 val = arr[0];
2807 for (j = 1; j < bytes; j++)
2808 val = (val << 8) | arr[j];
2809
2810 val = trunc_int_for_mode (val, mode);
2811
2812 return val >= low && val <= high;
2813 }
2814
2815 /* We accept:
2816 - any 32-bit constant (SImode, SFmode)
2817 - any constant that can be generated with fsmbi (any mode)
2818 - a 64-bit constant where the high and low bits are identical
2819 (DImode, DFmode)
2820 - a 128-bit constant where the four 32-bit words match. */
2821 int
2822 spu_legitimate_constant_p (rtx x)
2823 {
2824 if (GET_CODE (x) == HIGH)
2825 x = XEXP (x, 0);
2826 /* V4SI with all identical symbols is valid. */
2827 if (!flag_pic
2828 && GET_MODE (x) == V4SImode
2829 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
2830 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
2831 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
2832 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
2833 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
2834 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
2835
2836 if (GET_CODE (x) == CONST_VECTOR
2837 && !const_vector_immediate_p (x))
2838 return 0;
2839 return 1;
2840 }
2841
2842 /* Valid address are:
2843 - symbol_ref, label_ref, const
2844 - reg
2845 - reg + const, where either reg or const is 16 byte aligned
2846 - reg + reg, alignment doesn't matter
2847 The alignment matters in the reg+const case because lqd and stqd
2848 ignore the 4 least significant bits of the const. (TODO: It might be
2849 preferable to allow any alignment and fix it up when splitting.) */
2850 int
2851 spu_legitimate_address (enum machine_mode mode ATTRIBUTE_UNUSED,
2852 rtx x, int reg_ok_strict)
2853 {
2854 if (mode == TImode && GET_CODE (x) == AND
2855 && GET_CODE (XEXP (x, 1)) == CONST_INT
2856 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) -16)
2857 x = XEXP (x, 0);
2858 switch (GET_CODE (x))
2859 {
2860 case SYMBOL_REF:
2861 case LABEL_REF:
2862 return !TARGET_LARGE_MEM;
2863
2864 case CONST:
2865 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (x, 0)) == PLUS)
2866 {
2867 rtx sym = XEXP (XEXP (x, 0), 0);
2868 rtx cst = XEXP (XEXP (x, 0), 1);
2869
2870 /* Accept any symbol_ref + constant, assuming it does not
2871 wrap around the local store addressability limit. */
2872 if (GET_CODE (sym) == SYMBOL_REF && GET_CODE (cst) == CONST_INT)
2873 return 1;
2874 }
2875 return 0;
2876
2877 case CONST_INT:
2878 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
2879
2880 case SUBREG:
2881 x = XEXP (x, 0);
2882 gcc_assert (GET_CODE (x) == REG);
2883
2884 case REG:
2885 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
2886
2887 case PLUS:
2888 case LO_SUM:
2889 {
2890 rtx op0 = XEXP (x, 0);
2891 rtx op1 = XEXP (x, 1);
2892 if (GET_CODE (op0) == SUBREG)
2893 op0 = XEXP (op0, 0);
2894 if (GET_CODE (op1) == SUBREG)
2895 op1 = XEXP (op1, 0);
2896 /* We can't just accept any aligned register because CSE can
2897 change it to a register that is not marked aligned and then
2898 recog will fail. So we only accept frame registers because
2899 they will only be changed to other frame registers. */
2900 if (GET_CODE (op0) == REG
2901 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2902 && GET_CODE (op1) == CONST_INT
2903 && INTVAL (op1) >= -0x2000
2904 && INTVAL (op1) <= 0x1fff
2905 && (regno_aligned_for_load (REGNO (op0)) || (INTVAL (op1) & 15) == 0))
2906 return 1;
2907 if (GET_CODE (op0) == REG
2908 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
2909 && GET_CODE (op1) == REG
2910 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
2911 return 1;
2912 }
2913 break;
2914
2915 default:
2916 break;
2917 }
2918 return 0;
2919 }
2920
2921 /* When the address is reg + const_int, force the const_int into a
2922 register. */
2923 rtx
2924 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
2925 enum machine_mode mode)
2926 {
2927 rtx op0, op1;
2928 /* Make sure both operands are registers. */
2929 if (GET_CODE (x) == PLUS)
2930 {
2931 op0 = XEXP (x, 0);
2932 op1 = XEXP (x, 1);
2933 if (ALIGNED_SYMBOL_REF_P (op0))
2934 {
2935 op0 = force_reg (Pmode, op0);
2936 mark_reg_pointer (op0, 128);
2937 }
2938 else if (GET_CODE (op0) != REG)
2939 op0 = force_reg (Pmode, op0);
2940 if (ALIGNED_SYMBOL_REF_P (op1))
2941 {
2942 op1 = force_reg (Pmode, op1);
2943 mark_reg_pointer (op1, 128);
2944 }
2945 else if (GET_CODE (op1) != REG)
2946 op1 = force_reg (Pmode, op1);
2947 x = gen_rtx_PLUS (Pmode, op0, op1);
2948 if (spu_legitimate_address (mode, x, 0))
2949 return x;
2950 }
2951 return NULL_RTX;
2952 }
2953
2954 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
2955 struct attribute_spec.handler. */
2956 static tree
2957 spu_handle_fndecl_attribute (tree * node,
2958 tree name,
2959 tree args ATTRIBUTE_UNUSED,
2960 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2961 {
2962 if (TREE_CODE (*node) != FUNCTION_DECL)
2963 {
2964 warning (0, "`%s' attribute only applies to functions",
2965 IDENTIFIER_POINTER (name));
2966 *no_add_attrs = true;
2967 }
2968
2969 return NULL_TREE;
2970 }
2971
2972 /* Handle the "vector" attribute. */
2973 static tree
2974 spu_handle_vector_attribute (tree * node, tree name,
2975 tree args ATTRIBUTE_UNUSED,
2976 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
2977 {
2978 tree type = *node, result = NULL_TREE;
2979 enum machine_mode mode;
2980 int unsigned_p;
2981
2982 while (POINTER_TYPE_P (type)
2983 || TREE_CODE (type) == FUNCTION_TYPE
2984 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
2985 type = TREE_TYPE (type);
2986
2987 mode = TYPE_MODE (type);
2988
2989 unsigned_p = TYPE_UNSIGNED (type);
2990 switch (mode)
2991 {
2992 case DImode:
2993 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
2994 break;
2995 case SImode:
2996 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
2997 break;
2998 case HImode:
2999 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3000 break;
3001 case QImode:
3002 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3003 break;
3004 case SFmode:
3005 result = V4SF_type_node;
3006 break;
3007 case DFmode:
3008 result = V2DF_type_node;
3009 break;
3010 default:
3011 break;
3012 }
3013
3014 /* Propagate qualifiers attached to the element type
3015 onto the vector type. */
3016 if (result && result != type && TYPE_QUALS (type))
3017 result = build_qualified_type (result, TYPE_QUALS (type));
3018
3019 *no_add_attrs = true; /* No need to hang on to the attribute. */
3020
3021 if (!result)
3022 warning (0, "`%s' attribute ignored", IDENTIFIER_POINTER (name));
3023 else
3024 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3025
3026 return NULL_TREE;
3027 }
3028
3029 /* Return nonzero if FUNC is a naked function. */
3030 static int
3031 spu_naked_function_p (tree func)
3032 {
3033 tree a;
3034
3035 if (TREE_CODE (func) != FUNCTION_DECL)
3036 abort ();
3037
3038 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3039 return a != NULL_TREE;
3040 }
3041
3042 int
3043 spu_initial_elimination_offset (int from, int to)
3044 {
3045 int saved_regs_size = spu_saved_regs_size ();
3046 int sp_offset = 0;
3047 if (!current_function_is_leaf || crtl->outgoing_args_size
3048 || get_frame_size () || saved_regs_size)
3049 sp_offset = STACK_POINTER_OFFSET;
3050 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3051 return (sp_offset + crtl->outgoing_args_size);
3052 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3053 return 0;
3054 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3055 return sp_offset + crtl->outgoing_args_size
3056 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3057 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3058 return get_frame_size () + saved_regs_size + sp_offset;
3059 return 0;
3060 }
3061
3062 rtx
3063 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3064 {
3065 enum machine_mode mode = TYPE_MODE (type);
3066 int byte_size = ((mode == BLKmode)
3067 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3068
3069 /* Make sure small structs are left justified in a register. */
3070 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3071 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3072 {
3073 enum machine_mode smode;
3074 rtvec v;
3075 int i;
3076 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3077 int n = byte_size / UNITS_PER_WORD;
3078 v = rtvec_alloc (nregs);
3079 for (i = 0; i < n; i++)
3080 {
3081 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3082 gen_rtx_REG (TImode,
3083 FIRST_RETURN_REGNUM
3084 + i),
3085 GEN_INT (UNITS_PER_WORD * i));
3086 byte_size -= UNITS_PER_WORD;
3087 }
3088
3089 if (n < nregs)
3090 {
3091 if (byte_size < 4)
3092 byte_size = 4;
3093 smode =
3094 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3095 RTVEC_ELT (v, n) =
3096 gen_rtx_EXPR_LIST (VOIDmode,
3097 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
3098 GEN_INT (UNITS_PER_WORD * n));
3099 }
3100 return gen_rtx_PARALLEL (mode, v);
3101 }
3102 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
3103 }
3104
3105 rtx
3106 spu_function_arg (CUMULATIVE_ARGS cum,
3107 enum machine_mode mode,
3108 tree type, int named ATTRIBUTE_UNUSED)
3109 {
3110 int byte_size;
3111
3112 if (cum >= MAX_REGISTER_ARGS)
3113 return 0;
3114
3115 byte_size = ((mode == BLKmode)
3116 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3117
3118 /* The ABI does not allow parameters to be passed partially in
3119 reg and partially in stack. */
3120 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
3121 return 0;
3122
3123 /* Make sure small structs are left justified in a register. */
3124 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3125 && byte_size < UNITS_PER_WORD && byte_size > 0)
3126 {
3127 enum machine_mode smode;
3128 rtx gr_reg;
3129 if (byte_size < 4)
3130 byte_size = 4;
3131 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
3132 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
3133 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
3134 const0_rtx);
3135 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
3136 }
3137 else
3138 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
3139 }
3140
3141 /* Variable sized types are passed by reference. */
3142 static bool
3143 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
3144 enum machine_mode mode ATTRIBUTE_UNUSED,
3145 const_tree type, bool named ATTRIBUTE_UNUSED)
3146 {
3147 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
3148 }
3149 \f
3150
3151 /* Var args. */
3152
3153 /* Create and return the va_list datatype.
3154
3155 On SPU, va_list is an array type equivalent to
3156
3157 typedef struct __va_list_tag
3158 {
3159 void *__args __attribute__((__aligned(16)));
3160 void *__skip __attribute__((__aligned(16)));
3161
3162 } va_list[1];
3163
3164 where __args points to the arg that will be returned by the next
3165 va_arg(), and __skip points to the previous stack frame such that
3166 when __args == __skip we should advance __args by 32 bytes. */
3167 static tree
3168 spu_build_builtin_va_list (void)
3169 {
3170 tree f_args, f_skip, record, type_decl;
3171 bool owp;
3172
3173 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
3174
3175 type_decl =
3176 build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
3177
3178 f_args = build_decl (FIELD_DECL, get_identifier ("__args"), ptr_type_node);
3179 f_skip = build_decl (FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
3180
3181 DECL_FIELD_CONTEXT (f_args) = record;
3182 DECL_ALIGN (f_args) = 128;
3183 DECL_USER_ALIGN (f_args) = 1;
3184
3185 DECL_FIELD_CONTEXT (f_skip) = record;
3186 DECL_ALIGN (f_skip) = 128;
3187 DECL_USER_ALIGN (f_skip) = 1;
3188
3189 TREE_CHAIN (record) = type_decl;
3190 TYPE_NAME (record) = type_decl;
3191 TYPE_FIELDS (record) = f_args;
3192 TREE_CHAIN (f_args) = f_skip;
3193
3194 /* We know this is being padded and we want it too. It is an internal
3195 type so hide the warnings from the user. */
3196 owp = warn_padded;
3197 warn_padded = false;
3198
3199 layout_type (record);
3200
3201 warn_padded = owp;
3202
3203 /* The correct type is an array type of one element. */
3204 return build_array_type (record, build_index_type (size_zero_node));
3205 }
3206
3207 /* Implement va_start by filling the va_list structure VALIST.
3208 NEXTARG points to the first anonymous stack argument.
3209
3210 The following global variables are used to initialize
3211 the va_list structure:
3212
3213 crtl->args.info;
3214 the CUMULATIVE_ARGS for this function
3215
3216 crtl->args.arg_offset_rtx:
3217 holds the offset of the first anonymous stack argument
3218 (relative to the virtual arg pointer). */
3219
3220 static void
3221 spu_va_start (tree valist, rtx nextarg)
3222 {
3223 tree f_args, f_skip;
3224 tree args, skip, t;
3225
3226 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3227 f_skip = TREE_CHAIN (f_args);
3228
3229 valist = build_va_arg_indirect_ref (valist);
3230 args =
3231 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3232 skip =
3233 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3234
3235 /* Find the __args area. */
3236 t = make_tree (TREE_TYPE (args), nextarg);
3237 if (crtl->args.pretend_args_size > 0)
3238 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
3239 size_int (-STACK_POINTER_OFFSET));
3240 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, t);
3241 TREE_SIDE_EFFECTS (t) = 1;
3242 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3243
3244 /* Find the __skip area. */
3245 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
3246 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
3247 size_int (crtl->args.pretend_args_size
3248 - STACK_POINTER_OFFSET));
3249 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (skip), skip, t);
3250 TREE_SIDE_EFFECTS (t) = 1;
3251 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
3252 }
3253
3254 /* Gimplify va_arg by updating the va_list structure
3255 VALIST as required to retrieve an argument of type
3256 TYPE, and returning that argument.
3257
3258 ret = va_arg(VALIST, TYPE);
3259
3260 generates code equivalent to:
3261
3262 paddedsize = (sizeof(TYPE) + 15) & -16;
3263 if (VALIST.__args + paddedsize > VALIST.__skip
3264 && VALIST.__args <= VALIST.__skip)
3265 addr = VALIST.__skip + 32;
3266 else
3267 addr = VALIST.__args;
3268 VALIST.__args = addr + paddedsize;
3269 ret = *(TYPE *)addr;
3270 */
3271 static tree
3272 spu_gimplify_va_arg_expr (tree valist, tree type, tree * pre_p,
3273 tree * post_p ATTRIBUTE_UNUSED)
3274 {
3275 tree f_args, f_skip;
3276 tree args, skip;
3277 HOST_WIDE_INT size, rsize;
3278 tree paddedsize, addr, tmp;
3279 bool pass_by_reference_p;
3280
3281 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
3282 f_skip = TREE_CHAIN (f_args);
3283
3284 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
3285 args =
3286 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
3287 skip =
3288 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
3289
3290 addr = create_tmp_var (ptr_type_node, "va_arg");
3291 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
3292
3293 /* if an object is dynamically sized, a pointer to it is passed
3294 instead of the object itself. */
3295 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
3296 false);
3297 if (pass_by_reference_p)
3298 type = build_pointer_type (type);
3299 size = int_size_in_bytes (type);
3300 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
3301
3302 /* build conditional expression to calculate addr. The expression
3303 will be gimplified later. */
3304 paddedsize = size_int (rsize);
3305 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, args, paddedsize);
3306 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
3307 build2 (GT_EXPR, boolean_type_node, tmp, skip),
3308 build2 (LE_EXPR, boolean_type_node, args, skip));
3309
3310 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
3311 build2 (POINTER_PLUS_EXPR, ptr_type_node, skip,
3312 size_int (32)), args);
3313
3314 tmp = build2 (GIMPLE_MODIFY_STMT, ptr_type_node, addr, tmp);
3315 gimplify_and_add (tmp, pre_p);
3316
3317 /* update VALIST.__args */
3318 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
3319 tmp = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (args), args, tmp);
3320 gimplify_and_add (tmp, pre_p);
3321
3322 addr = fold_convert (build_pointer_type (type), addr);
3323
3324 if (pass_by_reference_p)
3325 addr = build_va_arg_indirect_ref (addr);
3326
3327 return build_va_arg_indirect_ref (addr);
3328 }
3329
3330 /* Save parameter registers starting with the register that corresponds
3331 to the first unnamed parameters. If the first unnamed parameter is
3332 in the stack then save no registers. Set pretend_args_size to the
3333 amount of space needed to save the registers. */
3334 void
3335 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
3336 tree type, int *pretend_size, int no_rtl)
3337 {
3338 if (!no_rtl)
3339 {
3340 rtx tmp;
3341 int regno;
3342 int offset;
3343 int ncum = *cum;
3344
3345 /* cum currently points to the last named argument, we want to
3346 start at the next argument. */
3347 FUNCTION_ARG_ADVANCE (ncum, mode, type, 1);
3348
3349 offset = -STACK_POINTER_OFFSET;
3350 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
3351 {
3352 tmp = gen_frame_mem (V4SImode,
3353 plus_constant (virtual_incoming_args_rtx,
3354 offset));
3355 emit_move_insn (tmp,
3356 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
3357 offset += 16;
3358 }
3359 *pretend_size = offset + STACK_POINTER_OFFSET;
3360 }
3361 }
3362 \f
3363 void
3364 spu_conditional_register_usage (void)
3365 {
3366 if (flag_pic)
3367 {
3368 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3369 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
3370 }
3371 }
3372
3373 /* This is called to decide when we can simplify a load instruction. We
3374 must only return true for registers which we know will always be
3375 aligned. Taking into account that CSE might replace this reg with
3376 another one that has not been marked aligned.
3377 So this is really only true for frame, stack and virtual registers,
3378 which we know are always aligned and should not be adversely effected
3379 by CSE. */
3380 static int
3381 regno_aligned_for_load (int regno)
3382 {
3383 return regno == FRAME_POINTER_REGNUM
3384 || (frame_pointer_needed && regno == HARD_FRAME_POINTER_REGNUM)
3385 || regno == STACK_POINTER_REGNUM
3386 || (regno >= FIRST_VIRTUAL_REGISTER
3387 && regno <= LAST_VIRTUAL_REGISTER);
3388 }
3389
3390 /* Return TRUE when mem is known to be 16-byte aligned. */
3391 int
3392 aligned_mem_p (rtx mem)
3393 {
3394 if (MEM_ALIGN (mem) >= 128)
3395 return 1;
3396 if (GET_MODE_SIZE (GET_MODE (mem)) >= 16)
3397 return 1;
3398 if (GET_CODE (XEXP (mem, 0)) == PLUS)
3399 {
3400 rtx p0 = XEXP (XEXP (mem, 0), 0);
3401 rtx p1 = XEXP (XEXP (mem, 0), 1);
3402 if (regno_aligned_for_load (REGNO (p0)))
3403 {
3404 if (GET_CODE (p1) == REG && regno_aligned_for_load (REGNO (p1)))
3405 return 1;
3406 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3407 return 1;
3408 }
3409 }
3410 else if (GET_CODE (XEXP (mem, 0)) == REG)
3411 {
3412 if (regno_aligned_for_load (REGNO (XEXP (mem, 0))))
3413 return 1;
3414 }
3415 else if (ALIGNED_SYMBOL_REF_P (XEXP (mem, 0)))
3416 return 1;
3417 else if (GET_CODE (XEXP (mem, 0)) == CONST)
3418 {
3419 rtx p0 = XEXP (XEXP (XEXP (mem, 0), 0), 0);
3420 rtx p1 = XEXP (XEXP (XEXP (mem, 0), 0), 1);
3421 if (GET_CODE (p0) == SYMBOL_REF
3422 && GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15) == 0)
3423 return 1;
3424 }
3425 return 0;
3426 }
3427
3428 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
3429 into its SYMBOL_REF_FLAGS. */
3430 static void
3431 spu_encode_section_info (tree decl, rtx rtl, int first)
3432 {
3433 default_encode_section_info (decl, rtl, first);
3434
3435 /* If a variable has a forced alignment to < 16 bytes, mark it with
3436 SYMBOL_FLAG_ALIGN1. */
3437 if (TREE_CODE (decl) == VAR_DECL
3438 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
3439 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
3440 }
3441
3442 /* Return TRUE if we are certain the mem refers to a complete object
3443 which is both 16-byte aligned and padded to a 16-byte boundary. This
3444 would make it safe to store with a single instruction.
3445 We guarantee the alignment and padding for static objects by aligning
3446 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
3447 FIXME: We currently cannot guarantee this for objects on the stack
3448 because assign_parm_setup_stack calls assign_stack_local with the
3449 alignment of the parameter mode and in that case the alignment never
3450 gets adjusted by LOCAL_ALIGNMENT. */
3451 static int
3452 store_with_one_insn_p (rtx mem)
3453 {
3454 rtx addr = XEXP (mem, 0);
3455 if (GET_MODE (mem) == BLKmode)
3456 return 0;
3457 /* Only static objects. */
3458 if (GET_CODE (addr) == SYMBOL_REF)
3459 {
3460 /* We use the associated declaration to make sure the access is
3461 referring to the whole object.
3462 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
3463 if it is necessary. Will there be cases where one exists, and
3464 the other does not? Will there be cases where both exist, but
3465 have different types? */
3466 tree decl = MEM_EXPR (mem);
3467 if (decl
3468 && TREE_CODE (decl) == VAR_DECL
3469 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3470 return 1;
3471 decl = SYMBOL_REF_DECL (addr);
3472 if (decl
3473 && TREE_CODE (decl) == VAR_DECL
3474 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
3475 return 1;
3476 }
3477 return 0;
3478 }
3479
3480 int
3481 spu_expand_mov (rtx * ops, enum machine_mode mode)
3482 {
3483 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
3484 abort ();
3485
3486 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
3487 {
3488 rtx from = SUBREG_REG (ops[1]);
3489 enum machine_mode imode = GET_MODE (from);
3490
3491 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
3492 && GET_MODE_CLASS (imode) == MODE_INT
3493 && subreg_lowpart_p (ops[1]));
3494
3495 if (GET_MODE_SIZE (imode) < 4)
3496 {
3497 from = gen_rtx_SUBREG (SImode, from, 0);
3498 imode = SImode;
3499 }
3500
3501 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
3502 {
3503 enum insn_code icode = convert_optab_handler (trunc_optab, mode, imode)->insn_code;
3504 emit_insn (GEN_FCN (icode) (ops[0], from));
3505 }
3506 else
3507 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
3508 return 1;
3509 }
3510
3511 /* At least one of the operands needs to be a register. */
3512 if ((reload_in_progress | reload_completed) == 0
3513 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3514 {
3515 rtx temp = force_reg (mode, ops[1]);
3516 emit_move_insn (ops[0], temp);
3517 return 1;
3518 }
3519 if (reload_in_progress || reload_completed)
3520 {
3521 if (CONSTANT_P (ops[1]))
3522 return spu_split_immediate (ops);
3523 return 0;
3524 }
3525 else
3526 {
3527 if (GET_CODE (ops[0]) == MEM)
3528 {
3529 if (!spu_valid_move (ops))
3530 {
3531 emit_insn (gen_store (ops[0], ops[1], gen_reg_rtx (TImode),
3532 gen_reg_rtx (TImode)));
3533 return 1;
3534 }
3535 }
3536 else if (GET_CODE (ops[1]) == MEM)
3537 {
3538 if (!spu_valid_move (ops))
3539 {
3540 emit_insn (gen_load
3541 (ops[0], ops[1], gen_reg_rtx (TImode),
3542 gen_reg_rtx (SImode)));
3543 return 1;
3544 }
3545 }
3546 /* Catch the SImode immediates greater than 0x7fffffff, and sign
3547 extend them. */
3548 if (GET_CODE (ops[1]) == CONST_INT)
3549 {
3550 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
3551 if (val != INTVAL (ops[1]))
3552 {
3553 emit_move_insn (ops[0], GEN_INT (val));
3554 return 1;
3555 }
3556 }
3557 }
3558 return 0;
3559 }
3560
3561 static int
3562 reg_align (rtx reg)
3563 {
3564 /* For now, only frame registers are known to be aligned at all times.
3565 We can't trust REGNO_POINTER_ALIGN because optimization will move
3566 registers around, potentially changing an "aligned" register in an
3567 address to an unaligned register, which would result in an invalid
3568 address. */
3569 int regno = REGNO (reg);
3570 return REGNO_PTR_FRAME_P (regno) ? REGNO_POINTER_ALIGN (regno) : 1;
3571 }
3572
3573 void
3574 spu_split_load (rtx * ops)
3575 {
3576 enum machine_mode mode = GET_MODE (ops[0]);
3577 rtx addr, load, rot, mem, p0, p1;
3578 int rot_amt;
3579
3580 addr = XEXP (ops[1], 0);
3581
3582 rot = 0;
3583 rot_amt = 0;
3584 if (GET_CODE (addr) == PLUS)
3585 {
3586 /* 8 cases:
3587 aligned reg + aligned reg => lqx
3588 aligned reg + unaligned reg => lqx, rotqby
3589 aligned reg + aligned const => lqd
3590 aligned reg + unaligned const => lqd, rotqbyi
3591 unaligned reg + aligned reg => lqx, rotqby
3592 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
3593 unaligned reg + aligned const => lqd, rotqby
3594 unaligned reg + unaligned const -> not allowed by legitimate address
3595 */
3596 p0 = XEXP (addr, 0);
3597 p1 = XEXP (addr, 1);
3598 if (reg_align (p0) < 128)
3599 {
3600 if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3601 {
3602 emit_insn (gen_addsi3 (ops[3], p0, p1));
3603 rot = ops[3];
3604 }
3605 else
3606 rot = p0;
3607 }
3608 else
3609 {
3610 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
3611 {
3612 rot_amt = INTVAL (p1) & 15;
3613 p1 = GEN_INT (INTVAL (p1) & -16);
3614 addr = gen_rtx_PLUS (SImode, p0, p1);
3615 }
3616 else if (GET_CODE (p1) == REG && reg_align (p1) < 128)
3617 rot = p1;
3618 }
3619 }
3620 else if (GET_CODE (addr) == REG)
3621 {
3622 if (reg_align (addr) < 128)
3623 rot = addr;
3624 }
3625 else if (GET_CODE (addr) == CONST)
3626 {
3627 if (GET_CODE (XEXP (addr, 0)) == PLUS
3628 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3629 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3630 {
3631 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
3632 if (rot_amt & -16)
3633 addr = gen_rtx_CONST (Pmode,
3634 gen_rtx_PLUS (Pmode,
3635 XEXP (XEXP (addr, 0), 0),
3636 GEN_INT (rot_amt & -16)));
3637 else
3638 addr = XEXP (XEXP (addr, 0), 0);
3639 }
3640 else
3641 rot = addr;
3642 }
3643 else if (GET_CODE (addr) == CONST_INT)
3644 {
3645 rot_amt = INTVAL (addr);
3646 addr = GEN_INT (rot_amt & -16);
3647 }
3648 else if (!ALIGNED_SYMBOL_REF_P (addr))
3649 rot = addr;
3650
3651 if (GET_MODE_SIZE (mode) < 4)
3652 rot_amt += GET_MODE_SIZE (mode) - 4;
3653
3654 rot_amt &= 15;
3655
3656 if (rot && rot_amt)
3657 {
3658 emit_insn (gen_addsi3 (ops[3], rot, GEN_INT (rot_amt)));
3659 rot = ops[3];
3660 rot_amt = 0;
3661 }
3662
3663 load = ops[2];
3664
3665 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3666 mem = change_address (ops[1], TImode, addr);
3667
3668 emit_insn (gen_movti (load, mem));
3669
3670 if (rot)
3671 emit_insn (gen_rotqby_ti (load, load, rot));
3672 else if (rot_amt)
3673 emit_insn (gen_rotlti3 (load, load, GEN_INT (rot_amt * 8)));
3674
3675 if (reload_completed)
3676 emit_move_insn (ops[0], gen_rtx_REG (GET_MODE (ops[0]), REGNO (load)));
3677 else
3678 emit_insn (gen_spu_convert (ops[0], load));
3679 }
3680
3681 void
3682 spu_split_store (rtx * ops)
3683 {
3684 enum machine_mode mode = GET_MODE (ops[0]);
3685 rtx pat = ops[2];
3686 rtx reg = ops[3];
3687 rtx addr, p0, p1, p1_lo, smem;
3688 int aform;
3689 int scalar;
3690
3691 addr = XEXP (ops[0], 0);
3692
3693 if (GET_CODE (addr) == PLUS)
3694 {
3695 /* 8 cases:
3696 aligned reg + aligned reg => lqx, c?x, shuf, stqx
3697 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
3698 aligned reg + aligned const => lqd, c?d, shuf, stqx
3699 aligned reg + unaligned const => lqd, c?d, shuf, stqx
3700 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
3701 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
3702 unaligned reg + aligned const => lqd, c?d, shuf, stqx
3703 unaligned reg + unaligned const -> not allowed by legitimate address
3704 */
3705 aform = 0;
3706 p0 = XEXP (addr, 0);
3707 p1 = p1_lo = XEXP (addr, 1);
3708 if (GET_CODE (p0) == REG && GET_CODE (p1) == CONST_INT)
3709 {
3710 p1_lo = GEN_INT (INTVAL (p1) & 15);
3711 p1 = GEN_INT (INTVAL (p1) & -16);
3712 addr = gen_rtx_PLUS (SImode, p0, p1);
3713 }
3714 }
3715 else if (GET_CODE (addr) == REG)
3716 {
3717 aform = 0;
3718 p0 = addr;
3719 p1 = p1_lo = const0_rtx;
3720 }
3721 else
3722 {
3723 aform = 1;
3724 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
3725 p1 = 0; /* aform doesn't use p1 */
3726 p1_lo = addr;
3727 if (ALIGNED_SYMBOL_REF_P (addr))
3728 p1_lo = const0_rtx;
3729 else if (GET_CODE (addr) == CONST)
3730 {
3731 if (GET_CODE (XEXP (addr, 0)) == PLUS
3732 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
3733 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
3734 {
3735 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
3736 if ((v & -16) != 0)
3737 addr = gen_rtx_CONST (Pmode,
3738 gen_rtx_PLUS (Pmode,
3739 XEXP (XEXP (addr, 0), 0),
3740 GEN_INT (v & -16)));
3741 else
3742 addr = XEXP (XEXP (addr, 0), 0);
3743 p1_lo = GEN_INT (v & 15);
3744 }
3745 }
3746 else if (GET_CODE (addr) == CONST_INT)
3747 {
3748 p1_lo = GEN_INT (INTVAL (addr) & 15);
3749 addr = GEN_INT (INTVAL (addr) & -16);
3750 }
3751 }
3752
3753 addr = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
3754
3755 scalar = store_with_one_insn_p (ops[0]);
3756 if (!scalar)
3757 {
3758 /* We could copy the flags from the ops[0] MEM to mem here,
3759 We don't because we want this load to be optimized away if
3760 possible, and copying the flags will prevent that in certain
3761 cases, e.g. consider the volatile flag. */
3762
3763 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
3764 set_mem_alias_set (lmem, 0);
3765 emit_insn (gen_movti (reg, lmem));
3766
3767 if (!p0 || reg_align (p0) >= 128)
3768 p0 = stack_pointer_rtx;
3769 if (!p1_lo)
3770 p1_lo = const0_rtx;
3771
3772 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
3773 emit_insn (gen_shufb (reg, ops[1], reg, pat));
3774 }
3775 else if (reload_completed)
3776 {
3777 if (GET_CODE (ops[1]) == REG)
3778 emit_move_insn (reg, gen_rtx_REG (GET_MODE (reg), REGNO (ops[1])));
3779 else if (GET_CODE (ops[1]) == SUBREG)
3780 emit_move_insn (reg,
3781 gen_rtx_REG (GET_MODE (reg),
3782 REGNO (SUBREG_REG (ops[1]))));
3783 else
3784 abort ();
3785 }
3786 else
3787 {
3788 if (GET_CODE (ops[1]) == REG)
3789 emit_insn (gen_spu_convert (reg, ops[1]));
3790 else if (GET_CODE (ops[1]) == SUBREG)
3791 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
3792 else
3793 abort ();
3794 }
3795
3796 if (GET_MODE_SIZE (mode) < 4 && scalar)
3797 emit_insn (gen_shlqby_ti
3798 (reg, reg, GEN_INT (4 - GET_MODE_SIZE (mode))));
3799
3800 smem = change_address (ops[0], TImode, addr);
3801 /* We can't use the previous alias set because the memory has changed
3802 size and can potentially overlap objects of other types. */
3803 set_mem_alias_set (smem, 0);
3804
3805 emit_insn (gen_movti (smem, reg));
3806 }
3807
3808 /* Return TRUE if X is MEM which is a struct member reference
3809 and the member can safely be loaded and stored with a single
3810 instruction because it is padded. */
3811 static int
3812 mem_is_padded_component_ref (rtx x)
3813 {
3814 tree t = MEM_EXPR (x);
3815 tree r;
3816 if (!t || TREE_CODE (t) != COMPONENT_REF)
3817 return 0;
3818 t = TREE_OPERAND (t, 1);
3819 if (!t || TREE_CODE (t) != FIELD_DECL
3820 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
3821 return 0;
3822 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
3823 r = DECL_FIELD_CONTEXT (t);
3824 if (!r || TREE_CODE (r) != RECORD_TYPE)
3825 return 0;
3826 /* Make sure they are the same mode */
3827 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
3828 return 0;
3829 /* If there are no following fields then the field alignment assures
3830 the structure is padded to the alignment which means this field is
3831 padded too. */
3832 if (TREE_CHAIN (t) == 0)
3833 return 1;
3834 /* If the following field is also aligned then this field will be
3835 padded. */
3836 t = TREE_CHAIN (t);
3837 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
3838 return 1;
3839 return 0;
3840 }
3841
3842 /* Parse the -mfixed-range= option string. */
3843 static void
3844 fix_range (const char *const_str)
3845 {
3846 int i, first, last;
3847 char *str, *dash, *comma;
3848
3849 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
3850 REG2 are either register names or register numbers. The effect
3851 of this option is to mark the registers in the range from REG1 to
3852 REG2 as ``fixed'' so they won't be used by the compiler. */
3853
3854 i = strlen (const_str);
3855 str = (char *) alloca (i + 1);
3856 memcpy (str, const_str, i + 1);
3857
3858 while (1)
3859 {
3860 dash = strchr (str, '-');
3861 if (!dash)
3862 {
3863 warning (0, "value of -mfixed-range must have form REG1-REG2");
3864 return;
3865 }
3866 *dash = '\0';
3867 comma = strchr (dash + 1, ',');
3868 if (comma)
3869 *comma = '\0';
3870
3871 first = decode_reg_name (str);
3872 if (first < 0)
3873 {
3874 warning (0, "unknown register name: %s", str);
3875 return;
3876 }
3877
3878 last = decode_reg_name (dash + 1);
3879 if (last < 0)
3880 {
3881 warning (0, "unknown register name: %s", dash + 1);
3882 return;
3883 }
3884
3885 *dash = '-';
3886
3887 if (first > last)
3888 {
3889 warning (0, "%s-%s is an empty range", str, dash + 1);
3890 return;
3891 }
3892
3893 for (i = first; i <= last; ++i)
3894 fixed_regs[i] = call_used_regs[i] = 1;
3895
3896 if (!comma)
3897 break;
3898
3899 *comma = ',';
3900 str = comma + 1;
3901 }
3902 }
3903
3904 int
3905 spu_valid_move (rtx * ops)
3906 {
3907 enum machine_mode mode = GET_MODE (ops[0]);
3908 if (!register_operand (ops[0], mode) && !register_operand (ops[1], mode))
3909 return 0;
3910
3911 /* init_expr_once tries to recog against load and store insns to set
3912 the direct_load[] and direct_store[] arrays. We always want to
3913 consider those loads and stores valid. init_expr_once is called in
3914 the context of a dummy function which does not have a decl. */
3915 if (cfun->decl == 0)
3916 return 1;
3917
3918 /* Don't allows loads/stores which would require more than 1 insn.
3919 During and after reload we assume loads and stores only take 1
3920 insn. */
3921 if (GET_MODE_SIZE (mode) < 16 && !reload_in_progress && !reload_completed)
3922 {
3923 if (GET_CODE (ops[0]) == MEM
3924 && (GET_MODE_SIZE (mode) < 4
3925 || !(store_with_one_insn_p (ops[0])
3926 || mem_is_padded_component_ref (ops[0]))))
3927 return 0;
3928 if (GET_CODE (ops[1]) == MEM
3929 && (GET_MODE_SIZE (mode) < 4 || !aligned_mem_p (ops[1])))
3930 return 0;
3931 }
3932 return 1;
3933 }
3934
3935 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3936 can be generated using the fsmbi instruction. */
3937 int
3938 fsmbi_const_p (rtx x)
3939 {
3940 if (CONSTANT_P (x))
3941 {
3942 /* We can always choose TImode for CONST_INT because the high bits
3943 of an SImode will always be all 1s, i.e., valid for fsmbi. */
3944 enum immediate_class c = classify_immediate (x, TImode);
3945 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
3946 }
3947 return 0;
3948 }
3949
3950 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
3951 can be generated using the cbd, chd, cwd or cdd instruction. */
3952 int
3953 cpat_const_p (rtx x, enum machine_mode mode)
3954 {
3955 if (CONSTANT_P (x))
3956 {
3957 enum immediate_class c = classify_immediate (x, mode);
3958 return c == IC_CPAT;
3959 }
3960 return 0;
3961 }
3962
3963 rtx
3964 gen_cpat_const (rtx * ops)
3965 {
3966 unsigned char dst[16];
3967 int i, offset, shift, isize;
3968 if (GET_CODE (ops[3]) != CONST_INT
3969 || GET_CODE (ops[2]) != CONST_INT
3970 || (GET_CODE (ops[1]) != CONST_INT
3971 && GET_CODE (ops[1]) != REG))
3972 return 0;
3973 if (GET_CODE (ops[1]) == REG
3974 && (!REG_POINTER (ops[1])
3975 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
3976 return 0;
3977
3978 for (i = 0; i < 16; i++)
3979 dst[i] = i + 16;
3980 isize = INTVAL (ops[3]);
3981 if (isize == 1)
3982 shift = 3;
3983 else if (isize == 2)
3984 shift = 2;
3985 else
3986 shift = 0;
3987 offset = (INTVAL (ops[2]) +
3988 (GET_CODE (ops[1]) ==
3989 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
3990 for (i = 0; i < isize; i++)
3991 dst[offset + i] = i + shift;
3992 return array_to_constant (TImode, dst);
3993 }
3994
3995 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
3996 array. Use MODE for CONST_INT's. When the constant's mode is smaller
3997 than 16 bytes, the value is repeated across the rest of the array. */
3998 void
3999 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
4000 {
4001 HOST_WIDE_INT val;
4002 int i, j, first;
4003
4004 memset (arr, 0, 16);
4005 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
4006 if (GET_CODE (x) == CONST_INT
4007 || (GET_CODE (x) == CONST_DOUBLE
4008 && (mode == SFmode || mode == DFmode)))
4009 {
4010 gcc_assert (mode != VOIDmode && mode != BLKmode);
4011
4012 if (GET_CODE (x) == CONST_DOUBLE)
4013 val = const_double_to_hwint (x);
4014 else
4015 val = INTVAL (x);
4016 first = GET_MODE_SIZE (mode) - 1;
4017 for (i = first; i >= 0; i--)
4018 {
4019 arr[i] = val & 0xff;
4020 val >>= 8;
4021 }
4022 /* Splat the constant across the whole array. */
4023 for (j = 0, i = first + 1; i < 16; i++)
4024 {
4025 arr[i] = arr[j];
4026 j = (j == first) ? 0 : j + 1;
4027 }
4028 }
4029 else if (GET_CODE (x) == CONST_DOUBLE)
4030 {
4031 val = CONST_DOUBLE_LOW (x);
4032 for (i = 15; i >= 8; i--)
4033 {
4034 arr[i] = val & 0xff;
4035 val >>= 8;
4036 }
4037 val = CONST_DOUBLE_HIGH (x);
4038 for (i = 7; i >= 0; i--)
4039 {
4040 arr[i] = val & 0xff;
4041 val >>= 8;
4042 }
4043 }
4044 else if (GET_CODE (x) == CONST_VECTOR)
4045 {
4046 int units;
4047 rtx elt;
4048 mode = GET_MODE_INNER (mode);
4049 units = CONST_VECTOR_NUNITS (x);
4050 for (i = 0; i < units; i++)
4051 {
4052 elt = CONST_VECTOR_ELT (x, i);
4053 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
4054 {
4055 if (GET_CODE (elt) == CONST_DOUBLE)
4056 val = const_double_to_hwint (elt);
4057 else
4058 val = INTVAL (elt);
4059 first = GET_MODE_SIZE (mode) - 1;
4060 if (first + i * GET_MODE_SIZE (mode) > 16)
4061 abort ();
4062 for (j = first; j >= 0; j--)
4063 {
4064 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
4065 val >>= 8;
4066 }
4067 }
4068 }
4069 }
4070 else
4071 gcc_unreachable();
4072 }
4073
4074 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
4075 smaller than 16 bytes, use the bytes that would represent that value
4076 in a register, e.g., for QImode return the value of arr[3]. */
4077 rtx
4078 array_to_constant (enum machine_mode mode, unsigned char arr[16])
4079 {
4080 enum machine_mode inner_mode;
4081 rtvec v;
4082 int units, size, i, j, k;
4083 HOST_WIDE_INT val;
4084
4085 if (GET_MODE_CLASS (mode) == MODE_INT
4086 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
4087 {
4088 j = GET_MODE_SIZE (mode);
4089 i = j < 4 ? 4 - j : 0;
4090 for (val = 0; i < j; i++)
4091 val = (val << 8) | arr[i];
4092 val = trunc_int_for_mode (val, mode);
4093 return GEN_INT (val);
4094 }
4095
4096 if (mode == TImode)
4097 {
4098 HOST_WIDE_INT high;
4099 for (i = high = 0; i < 8; i++)
4100 high = (high << 8) | arr[i];
4101 for (i = 8, val = 0; i < 16; i++)
4102 val = (val << 8) | arr[i];
4103 return immed_double_const (val, high, TImode);
4104 }
4105 if (mode == SFmode)
4106 {
4107 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4108 val = trunc_int_for_mode (val, SImode);
4109 return hwint_to_const_double (SFmode, val);
4110 }
4111 if (mode == DFmode)
4112 {
4113 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
4114 val <<= 32;
4115 val |= (arr[4] << 24) | (arr[5] << 16) | (arr[6] << 8) | arr[7];
4116 return hwint_to_const_double (DFmode, val);
4117 }
4118
4119 if (!VECTOR_MODE_P (mode))
4120 abort ();
4121
4122 units = GET_MODE_NUNITS (mode);
4123 size = GET_MODE_UNIT_SIZE (mode);
4124 inner_mode = GET_MODE_INNER (mode);
4125 v = rtvec_alloc (units);
4126
4127 for (k = i = 0; i < units; ++i)
4128 {
4129 val = 0;
4130 for (j = 0; j < size; j++, k++)
4131 val = (val << 8) | arr[k];
4132
4133 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
4134 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
4135 else
4136 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
4137 }
4138 if (k > 16)
4139 abort ();
4140
4141 return gen_rtx_CONST_VECTOR (mode, v);
4142 }
4143
4144 static void
4145 reloc_diagnostic (rtx x)
4146 {
4147 tree loc_decl, decl = 0;
4148 const char *msg;
4149 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
4150 return;
4151
4152 if (GET_CODE (x) == SYMBOL_REF)
4153 decl = SYMBOL_REF_DECL (x);
4154 else if (GET_CODE (x) == CONST
4155 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
4156 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
4157
4158 /* SYMBOL_REF_DECL is not necessarily a DECL. */
4159 if (decl && !DECL_P (decl))
4160 decl = 0;
4161
4162 /* We use last_assemble_variable_decl to get line information. It's
4163 not always going to be right and might not even be close, but will
4164 be right for the more common cases. */
4165 if (!last_assemble_variable_decl || in_section == ctors_section)
4166 loc_decl = decl;
4167 else
4168 loc_decl = last_assemble_variable_decl;
4169
4170 /* The decl could be a string constant. */
4171 if (decl && DECL_P (decl))
4172 msg = "%Jcreating run-time relocation for %qD";
4173 else
4174 msg = "creating run-time relocation";
4175
4176 if (TARGET_WARN_RELOC)
4177 warning (0, msg, loc_decl, decl);
4178 else
4179 error (msg, loc_decl, decl);
4180 }
4181
4182 /* Hook into assemble_integer so we can generate an error for run-time
4183 relocations. The SPU ABI disallows them. */
4184 static bool
4185 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
4186 {
4187 /* By default run-time relocations aren't supported, but we allow them
4188 in case users support it in their own run-time loader. And we provide
4189 a warning for those users that don't. */
4190 if ((GET_CODE (x) == SYMBOL_REF)
4191 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
4192 reloc_diagnostic (x);
4193
4194 return default_assemble_integer (x, size, aligned_p);
4195 }
4196
4197 static void
4198 spu_asm_globalize_label (FILE * file, const char *name)
4199 {
4200 fputs ("\t.global\t", file);
4201 assemble_name (file, name);
4202 fputs ("\n", file);
4203 }
4204
4205 static bool
4206 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total)
4207 {
4208 enum machine_mode mode = GET_MODE (x);
4209 int cost = COSTS_N_INSNS (2);
4210
4211 /* Folding to a CONST_VECTOR will use extra space but there might
4212 be only a small savings in cycles. We'd like to use a CONST_VECTOR
4213 only if it allows us to fold away multiple insns. Changing the cost
4214 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
4215 because this cost will only be compared against a single insn.
4216 if (code == CONST_VECTOR)
4217 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
4218 */
4219
4220 /* Use defaults for float operations. Not accurate but good enough. */
4221 if (mode == DFmode)
4222 {
4223 *total = COSTS_N_INSNS (13);
4224 return true;
4225 }
4226 if (mode == SFmode)
4227 {
4228 *total = COSTS_N_INSNS (6);
4229 return true;
4230 }
4231 switch (code)
4232 {
4233 case CONST_INT:
4234 if (satisfies_constraint_K (x))
4235 *total = 0;
4236 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
4237 *total = COSTS_N_INSNS (1);
4238 else
4239 *total = COSTS_N_INSNS (3);
4240 return true;
4241
4242 case CONST:
4243 *total = COSTS_N_INSNS (3);
4244 return true;
4245
4246 case LABEL_REF:
4247 case SYMBOL_REF:
4248 *total = COSTS_N_INSNS (0);
4249 return true;
4250
4251 case CONST_DOUBLE:
4252 *total = COSTS_N_INSNS (5);
4253 return true;
4254
4255 case FLOAT_EXTEND:
4256 case FLOAT_TRUNCATE:
4257 case FLOAT:
4258 case UNSIGNED_FLOAT:
4259 case FIX:
4260 case UNSIGNED_FIX:
4261 *total = COSTS_N_INSNS (7);
4262 return true;
4263
4264 case PLUS:
4265 if (mode == TImode)
4266 {
4267 *total = COSTS_N_INSNS (9);
4268 return true;
4269 }
4270 break;
4271
4272 case MULT:
4273 cost =
4274 GET_CODE (XEXP (x, 0)) ==
4275 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
4276 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
4277 {
4278 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
4279 {
4280 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
4281 cost = COSTS_N_INSNS (14);
4282 if ((val & 0xffff) == 0)
4283 cost = COSTS_N_INSNS (9);
4284 else if (val > 0 && val < 0x10000)
4285 cost = COSTS_N_INSNS (11);
4286 }
4287 }
4288 *total = cost;
4289 return true;
4290 case DIV:
4291 case UDIV:
4292 case MOD:
4293 case UMOD:
4294 *total = COSTS_N_INSNS (20);
4295 return true;
4296 case ROTATE:
4297 case ROTATERT:
4298 case ASHIFT:
4299 case ASHIFTRT:
4300 case LSHIFTRT:
4301 *total = COSTS_N_INSNS (4);
4302 return true;
4303 case UNSPEC:
4304 if (XINT (x, 1) == UNSPEC_CONVERT)
4305 *total = COSTS_N_INSNS (0);
4306 else
4307 *total = COSTS_N_INSNS (4);
4308 return true;
4309 }
4310 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
4311 if (GET_MODE_CLASS (mode) == MODE_INT
4312 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
4313 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
4314 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
4315 *total = cost;
4316 return true;
4317 }
4318
4319 enum machine_mode
4320 spu_eh_return_filter_mode (void)
4321 {
4322 /* We would like this to be SImode, but sjlj exceptions seems to work
4323 only with word_mode. */
4324 return TImode;
4325 }
4326
4327 /* Decide whether we can make a sibling call to a function. DECL is the
4328 declaration of the function being targeted by the call and EXP is the
4329 CALL_EXPR representing the call. */
4330 static bool
4331 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
4332 {
4333 return decl && !TARGET_LARGE_MEM;
4334 }
4335
4336 /* We need to correctly update the back chain pointer and the Available
4337 Stack Size (which is in the second slot of the sp register.) */
4338 void
4339 spu_allocate_stack (rtx op0, rtx op1)
4340 {
4341 HOST_WIDE_INT v;
4342 rtx chain = gen_reg_rtx (V4SImode);
4343 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
4344 rtx sp = gen_reg_rtx (V4SImode);
4345 rtx splatted = gen_reg_rtx (V4SImode);
4346 rtx pat = gen_reg_rtx (TImode);
4347
4348 /* copy the back chain so we can save it back again. */
4349 emit_move_insn (chain, stack_bot);
4350
4351 op1 = force_reg (SImode, op1);
4352
4353 v = 0x1020300010203ll;
4354 emit_move_insn (pat, immed_double_const (v, v, TImode));
4355 emit_insn (gen_shufb (splatted, op1, op1, pat));
4356
4357 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
4358 emit_insn (gen_subv4si3 (sp, sp, splatted));
4359
4360 if (flag_stack_check)
4361 {
4362 rtx avail = gen_reg_rtx(SImode);
4363 rtx result = gen_reg_rtx(SImode);
4364 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
4365 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
4366 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
4367 }
4368
4369 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
4370
4371 emit_move_insn (stack_bot, chain);
4372
4373 emit_move_insn (op0, virtual_stack_dynamic_rtx);
4374 }
4375
4376 void
4377 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4378 {
4379 static unsigned char arr[16] =
4380 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4381 rtx temp = gen_reg_rtx (SImode);
4382 rtx temp2 = gen_reg_rtx (SImode);
4383 rtx temp3 = gen_reg_rtx (V4SImode);
4384 rtx temp4 = gen_reg_rtx (V4SImode);
4385 rtx pat = gen_reg_rtx (TImode);
4386 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4387
4388 /* Restore the backchain from the first word, sp from the second. */
4389 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
4390 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
4391
4392 emit_move_insn (pat, array_to_constant (TImode, arr));
4393
4394 /* Compute Available Stack Size for sp */
4395 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4396 emit_insn (gen_shufb (temp3, temp, temp, pat));
4397
4398 /* Compute Available Stack Size for back chain */
4399 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
4400 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
4401 emit_insn (gen_addv4si3 (temp4, sp, temp4));
4402
4403 emit_insn (gen_addv4si3 (sp, sp, temp3));
4404 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
4405 }
4406
4407 static void
4408 spu_init_libfuncs (void)
4409 {
4410 set_optab_libfunc (smul_optab, DImode, "__muldi3");
4411 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
4412 set_optab_libfunc (smod_optab, DImode, "__moddi3");
4413 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
4414 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
4415 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
4416 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
4417 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
4418 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
4419 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
4420 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
4421
4422 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
4423 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
4424
4425 set_optab_libfunc (smul_optab, TImode, "__multi3");
4426 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
4427 set_optab_libfunc (smod_optab, TImode, "__modti3");
4428 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
4429 set_optab_libfunc (umod_optab, TImode, "__umodti3");
4430 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
4431 }
4432
4433 /* Make a subreg, stripping any existing subreg. We could possibly just
4434 call simplify_subreg, but in this case we know what we want. */
4435 rtx
4436 spu_gen_subreg (enum machine_mode mode, rtx x)
4437 {
4438 if (GET_CODE (x) == SUBREG)
4439 x = SUBREG_REG (x);
4440 if (GET_MODE (x) == mode)
4441 return x;
4442 return gen_rtx_SUBREG (mode, x, 0);
4443 }
4444
4445 static bool
4446 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
4447 {
4448 return (TYPE_MODE (type) == BLKmode
4449 && ((type) == 0
4450 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4451 || int_size_in_bytes (type) >
4452 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
4453 }
4454 \f
4455 /* Create the built-in types and functions */
4456
4457 struct spu_builtin_description spu_builtins[] = {
4458 #define DEF_BUILTIN(fcode, icode, name, type, params) \
4459 {fcode, icode, name, type, params, NULL_TREE},
4460 #include "spu-builtins.def"
4461 #undef DEF_BUILTIN
4462 };
4463
4464 static void
4465 spu_init_builtins (void)
4466 {
4467 struct spu_builtin_description *d;
4468 unsigned int i;
4469
4470 V16QI_type_node = build_vector_type (intQI_type_node, 16);
4471 V8HI_type_node = build_vector_type (intHI_type_node, 8);
4472 V4SI_type_node = build_vector_type (intSI_type_node, 4);
4473 V2DI_type_node = build_vector_type (intDI_type_node, 2);
4474 V4SF_type_node = build_vector_type (float_type_node, 4);
4475 V2DF_type_node = build_vector_type (double_type_node, 2);
4476
4477 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
4478 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
4479 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
4480 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
4481
4482 spu_builtin_types[SPU_BTI_QUADWORD] = intTI_type_node;
4483
4484 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
4485 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
4486 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
4487 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
4488 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
4489 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
4490 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
4491 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
4492 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
4493 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
4494 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
4495 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
4496
4497 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
4498 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
4499 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
4500 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
4501 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
4502 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
4503 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
4504 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
4505
4506 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
4507 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
4508
4509 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
4510
4511 spu_builtin_types[SPU_BTI_PTR] =
4512 build_pointer_type (build_qualified_type
4513 (void_type_node,
4514 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
4515
4516 /* For each builtin we build a new prototype. The tree code will make
4517 sure nodes are shared. */
4518 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
4519 {
4520 tree p;
4521 char name[64]; /* build_function will make a copy. */
4522 int parm;
4523
4524 if (d->name == 0)
4525 continue;
4526
4527 /* Find last parm. */
4528 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
4529 ;
4530
4531 p = void_list_node;
4532 while (parm > 1)
4533 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
4534
4535 p = build_function_type (spu_builtin_types[d->parm[0]], p);
4536
4537 sprintf (name, "__builtin_%s", d->name);
4538 d->fndecl =
4539 add_builtin_function (name, p, END_BUILTINS + i, BUILT_IN_MD,
4540 NULL, NULL_TREE);
4541 if (d->fcode == SPU_MASK_FOR_LOAD)
4542 TREE_READONLY (d->fndecl) = 1;
4543
4544 /* These builtins don't throw. */
4545 TREE_NOTHROW (d->fndecl) = 1;
4546 }
4547 }
4548
4549 void
4550 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
4551 {
4552 static unsigned char arr[16] =
4553 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
4554
4555 rtx temp = gen_reg_rtx (Pmode);
4556 rtx temp2 = gen_reg_rtx (V4SImode);
4557 rtx temp3 = gen_reg_rtx (V4SImode);
4558 rtx pat = gen_reg_rtx (TImode);
4559 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
4560
4561 emit_move_insn (pat, array_to_constant (TImode, arr));
4562
4563 /* Restore the sp. */
4564 emit_move_insn (temp, op1);
4565 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
4566
4567 /* Compute available stack size for sp. */
4568 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
4569 emit_insn (gen_shufb (temp3, temp, temp, pat));
4570
4571 emit_insn (gen_addv4si3 (sp, sp, temp3));
4572 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
4573 }
4574
4575 int
4576 spu_safe_dma (HOST_WIDE_INT channel)
4577 {
4578 return (channel >= 21 && channel <= 27);
4579 }
4580
4581 void
4582 spu_builtin_splats (rtx ops[])
4583 {
4584 enum machine_mode mode = GET_MODE (ops[0]);
4585 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
4586 {
4587 unsigned char arr[16];
4588 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
4589 emit_move_insn (ops[0], array_to_constant (mode, arr));
4590 }
4591 else if (!flag_pic && GET_MODE (ops[0]) == V4SImode && CONSTANT_P (ops[1]))
4592 {
4593 rtvec v = rtvec_alloc (4);
4594 RTVEC_ELT (v, 0) = ops[1];
4595 RTVEC_ELT (v, 1) = ops[1];
4596 RTVEC_ELT (v, 2) = ops[1];
4597 RTVEC_ELT (v, 3) = ops[1];
4598 emit_move_insn (ops[0], gen_rtx_CONST_VECTOR (mode, v));
4599 }
4600 else
4601 {
4602 rtx reg = gen_reg_rtx (TImode);
4603 rtx shuf;
4604 if (GET_CODE (ops[1]) != REG
4605 && GET_CODE (ops[1]) != SUBREG)
4606 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
4607 switch (mode)
4608 {
4609 case V2DImode:
4610 case V2DFmode:
4611 shuf =
4612 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
4613 TImode);
4614 break;
4615 case V4SImode:
4616 case V4SFmode:
4617 shuf =
4618 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
4619 TImode);
4620 break;
4621 case V8HImode:
4622 shuf =
4623 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
4624 TImode);
4625 break;
4626 case V16QImode:
4627 shuf =
4628 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
4629 TImode);
4630 break;
4631 default:
4632 abort ();
4633 }
4634 emit_move_insn (reg, shuf);
4635 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
4636 }
4637 }
4638
4639 void
4640 spu_builtin_extract (rtx ops[])
4641 {
4642 enum machine_mode mode;
4643 rtx rot, from, tmp;
4644
4645 mode = GET_MODE (ops[1]);
4646
4647 if (GET_CODE (ops[2]) == CONST_INT)
4648 {
4649 switch (mode)
4650 {
4651 case V16QImode:
4652 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
4653 break;
4654 case V8HImode:
4655 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
4656 break;
4657 case V4SFmode:
4658 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
4659 break;
4660 case V4SImode:
4661 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
4662 break;
4663 case V2DImode:
4664 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
4665 break;
4666 case V2DFmode:
4667 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
4668 break;
4669 default:
4670 abort ();
4671 }
4672 return;
4673 }
4674
4675 from = spu_gen_subreg (TImode, ops[1]);
4676 rot = gen_reg_rtx (TImode);
4677 tmp = gen_reg_rtx (SImode);
4678
4679 switch (mode)
4680 {
4681 case V16QImode:
4682 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
4683 break;
4684 case V8HImode:
4685 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
4686 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
4687 break;
4688 case V4SFmode:
4689 case V4SImode:
4690 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
4691 break;
4692 case V2DImode:
4693 case V2DFmode:
4694 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
4695 break;
4696 default:
4697 abort ();
4698 }
4699 emit_insn (gen_rotqby_ti (rot, from, tmp));
4700
4701 emit_insn (gen_spu_convert (ops[0], rot));
4702 }
4703
4704 void
4705 spu_builtin_insert (rtx ops[])
4706 {
4707 enum machine_mode mode = GET_MODE (ops[0]);
4708 enum machine_mode imode = GET_MODE_INNER (mode);
4709 rtx mask = gen_reg_rtx (TImode);
4710 rtx offset;
4711
4712 if (GET_CODE (ops[3]) == CONST_INT)
4713 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
4714 else
4715 {
4716 offset = gen_reg_rtx (SImode);
4717 emit_insn (gen_mulsi3
4718 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
4719 }
4720 emit_insn (gen_cpat
4721 (mask, stack_pointer_rtx, offset,
4722 GEN_INT (GET_MODE_SIZE (imode))));
4723 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
4724 }
4725
4726 void
4727 spu_builtin_promote (rtx ops[])
4728 {
4729 enum machine_mode mode, imode;
4730 rtx rot, from, offset;
4731 HOST_WIDE_INT pos;
4732
4733 mode = GET_MODE (ops[0]);
4734 imode = GET_MODE_INNER (mode);
4735
4736 from = gen_reg_rtx (TImode);
4737 rot = spu_gen_subreg (TImode, ops[0]);
4738
4739 emit_insn (gen_spu_convert (from, ops[1]));
4740
4741 if (GET_CODE (ops[2]) == CONST_INT)
4742 {
4743 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
4744 if (GET_MODE_SIZE (imode) < 4)
4745 pos += 4 - GET_MODE_SIZE (imode);
4746 offset = GEN_INT (pos & 15);
4747 }
4748 else
4749 {
4750 offset = gen_reg_rtx (SImode);
4751 switch (mode)
4752 {
4753 case V16QImode:
4754 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
4755 break;
4756 case V8HImode:
4757 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
4758 emit_insn (gen_addsi3 (offset, offset, offset));
4759 break;
4760 case V4SFmode:
4761 case V4SImode:
4762 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
4763 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
4764 break;
4765 case V2DImode:
4766 case V2DFmode:
4767 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
4768 break;
4769 default:
4770 abort ();
4771 }
4772 }
4773 emit_insn (gen_rotqby_ti (rot, from, offset));
4774 }
4775
4776 void
4777 spu_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
4778 {
4779 rtx shuf = gen_reg_rtx (V4SImode);
4780 rtx insn = gen_reg_rtx (V4SImode);
4781 rtx shufc;
4782 rtx insnc;
4783 rtx mem;
4784
4785 fnaddr = force_reg (SImode, fnaddr);
4786 cxt = force_reg (SImode, cxt);
4787
4788 if (TARGET_LARGE_MEM)
4789 {
4790 rtx rotl = gen_reg_rtx (V4SImode);
4791 rtx mask = gen_reg_rtx (V4SImode);
4792 rtx bi = gen_reg_rtx (SImode);
4793 unsigned char shufa[16] = {
4794 2, 3, 0, 1, 18, 19, 16, 17,
4795 0, 1, 2, 3, 16, 17, 18, 19
4796 };
4797 unsigned char insna[16] = {
4798 0x41, 0, 0, 79,
4799 0x41, 0, 0, STATIC_CHAIN_REGNUM,
4800 0x60, 0x80, 0, 79,
4801 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
4802 };
4803
4804 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
4805 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4806
4807 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
4808 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
4809 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
4810 emit_insn (gen_selb (insn, insnc, rotl, mask));
4811
4812 mem = memory_address (Pmode, tramp);
4813 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4814
4815 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
4816 mem = memory_address (Pmode, plus_constant (tramp, 16));
4817 emit_move_insn (gen_rtx_MEM (Pmode, mem), bi);
4818 }
4819 else
4820 {
4821 rtx scxt = gen_reg_rtx (SImode);
4822 rtx sfnaddr = gen_reg_rtx (SImode);
4823 unsigned char insna[16] = {
4824 0x42, 0, 0, STATIC_CHAIN_REGNUM,
4825 0x30, 0, 0, 0,
4826 0, 0, 0, 0,
4827 0, 0, 0, 0
4828 };
4829
4830 shufc = gen_reg_rtx (TImode);
4831 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
4832
4833 /* By or'ing all of cxt with the ila opcode we are assuming cxt
4834 fits 18 bits and the last 4 are zeros. This will be true if
4835 the stack pointer is initialized to 0x3fff0 at program start,
4836 otherwise the ila instruction will be garbage. */
4837
4838 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
4839 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
4840 emit_insn (gen_cpat
4841 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
4842 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
4843 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
4844
4845 mem = memory_address (Pmode, tramp);
4846 emit_move_insn (gen_rtx_MEM (V4SImode, mem), insn);
4847
4848 }
4849 emit_insn (gen_sync ());
4850 }
4851
4852 void
4853 spu_expand_sign_extend (rtx ops[])
4854 {
4855 unsigned char arr[16];
4856 rtx pat = gen_reg_rtx (TImode);
4857 rtx sign, c;
4858 int i, last;
4859 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
4860 if (GET_MODE (ops[1]) == QImode)
4861 {
4862 sign = gen_reg_rtx (HImode);
4863 emit_insn (gen_extendqihi2 (sign, ops[1]));
4864 for (i = 0; i < 16; i++)
4865 arr[i] = 0x12;
4866 arr[last] = 0x13;
4867 }
4868 else
4869 {
4870 for (i = 0; i < 16; i++)
4871 arr[i] = 0x10;
4872 switch (GET_MODE (ops[1]))
4873 {
4874 case HImode:
4875 sign = gen_reg_rtx (SImode);
4876 emit_insn (gen_extendhisi2 (sign, ops[1]));
4877 arr[last] = 0x03;
4878 arr[last - 1] = 0x02;
4879 break;
4880 case SImode:
4881 sign = gen_reg_rtx (SImode);
4882 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
4883 for (i = 0; i < 4; i++)
4884 arr[last - i] = 3 - i;
4885 break;
4886 case DImode:
4887 sign = gen_reg_rtx (SImode);
4888 c = gen_reg_rtx (SImode);
4889 emit_insn (gen_spu_convert (c, ops[1]));
4890 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
4891 for (i = 0; i < 8; i++)
4892 arr[last - i] = 7 - i;
4893 break;
4894 default:
4895 abort ();
4896 }
4897 }
4898 emit_move_insn (pat, array_to_constant (TImode, arr));
4899 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
4900 }
4901
4902 /* expand vector initialization. If there are any constant parts,
4903 load constant parts first. Then load any non-constant parts. */
4904 void
4905 spu_expand_vector_init (rtx target, rtx vals)
4906 {
4907 enum machine_mode mode = GET_MODE (target);
4908 int n_elts = GET_MODE_NUNITS (mode);
4909 int n_var = 0;
4910 bool all_same = true;
4911 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
4912 int i;
4913
4914 first = XVECEXP (vals, 0, 0);
4915 for (i = 0; i < n_elts; ++i)
4916 {
4917 x = XVECEXP (vals, 0, i);
4918 if (!CONSTANT_P (x))
4919 ++n_var;
4920 else
4921 {
4922 if (first_constant == NULL_RTX)
4923 first_constant = x;
4924 }
4925 if (i > 0 && !rtx_equal_p (x, first))
4926 all_same = false;
4927 }
4928
4929 /* if all elements are the same, use splats to repeat elements */
4930 if (all_same)
4931 {
4932 if (!CONSTANT_P (first)
4933 && !register_operand (first, GET_MODE (x)))
4934 first = force_reg (GET_MODE (first), first);
4935 emit_insn (gen_spu_splats (target, first));
4936 return;
4937 }
4938
4939 /* load constant parts */
4940 if (n_var != n_elts)
4941 {
4942 if (n_var == 0)
4943 {
4944 emit_move_insn (target,
4945 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
4946 }
4947 else
4948 {
4949 rtx constant_parts_rtx = copy_rtx (vals);
4950
4951 gcc_assert (first_constant != NULL_RTX);
4952 /* fill empty slots with the first constant, this increases
4953 our chance of using splats in the recursive call below. */
4954 for (i = 0; i < n_elts; ++i)
4955 if (!CONSTANT_P (XVECEXP (constant_parts_rtx, 0, i)))
4956 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
4957
4958 spu_expand_vector_init (target, constant_parts_rtx);
4959 }
4960 }
4961
4962 /* load variable parts */
4963 if (n_var != 0)
4964 {
4965 rtx insert_operands[4];
4966
4967 insert_operands[0] = target;
4968 insert_operands[2] = target;
4969 for (i = 0; i < n_elts; ++i)
4970 {
4971 x = XVECEXP (vals, 0, i);
4972 if (!CONSTANT_P (x))
4973 {
4974 if (!register_operand (x, GET_MODE (x)))
4975 x = force_reg (GET_MODE (x), x);
4976 insert_operands[1] = x;
4977 insert_operands[3] = GEN_INT (i);
4978 spu_builtin_insert (insert_operands);
4979 }
4980 }
4981 }
4982 }
4983
4984 /* Return insn index for the vector compare instruction for given CODE,
4985 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
4986
4987 static int
4988 get_vec_cmp_insn (enum rtx_code code,
4989 enum machine_mode dest_mode,
4990 enum machine_mode op_mode)
4991
4992 {
4993 switch (code)
4994 {
4995 case EQ:
4996 if (dest_mode == V16QImode && op_mode == V16QImode)
4997 return CODE_FOR_ceq_v16qi;
4998 if (dest_mode == V8HImode && op_mode == V8HImode)
4999 return CODE_FOR_ceq_v8hi;
5000 if (dest_mode == V4SImode && op_mode == V4SImode)
5001 return CODE_FOR_ceq_v4si;
5002 if (dest_mode == V4SImode && op_mode == V4SFmode)
5003 return CODE_FOR_ceq_v4sf;
5004 if (dest_mode == V2DImode && op_mode == V2DFmode)
5005 return CODE_FOR_ceq_v2df;
5006 break;
5007 case GT:
5008 if (dest_mode == V16QImode && op_mode == V16QImode)
5009 return CODE_FOR_cgt_v16qi;
5010 if (dest_mode == V8HImode && op_mode == V8HImode)
5011 return CODE_FOR_cgt_v8hi;
5012 if (dest_mode == V4SImode && op_mode == V4SImode)
5013 return CODE_FOR_cgt_v4si;
5014 if (dest_mode == V4SImode && op_mode == V4SFmode)
5015 return CODE_FOR_cgt_v4sf;
5016 if (dest_mode == V2DImode && op_mode == V2DFmode)
5017 return CODE_FOR_cgt_v2df;
5018 break;
5019 case GTU:
5020 if (dest_mode == V16QImode && op_mode == V16QImode)
5021 return CODE_FOR_clgt_v16qi;
5022 if (dest_mode == V8HImode && op_mode == V8HImode)
5023 return CODE_FOR_clgt_v8hi;
5024 if (dest_mode == V4SImode && op_mode == V4SImode)
5025 return CODE_FOR_clgt_v4si;
5026 break;
5027 default:
5028 break;
5029 }
5030 return -1;
5031 }
5032
5033 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
5034 DMODE is expected destination mode. This is a recursive function. */
5035
5036 static rtx
5037 spu_emit_vector_compare (enum rtx_code rcode,
5038 rtx op0, rtx op1,
5039 enum machine_mode dmode)
5040 {
5041 int vec_cmp_insn;
5042 rtx mask;
5043 enum machine_mode dest_mode;
5044 enum machine_mode op_mode = GET_MODE (op1);
5045
5046 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
5047
5048 /* Floating point vector compare instructions uses destination V4SImode.
5049 Double floating point vector compare instructions uses destination V2DImode.
5050 Move destination to appropriate mode later. */
5051 if (dmode == V4SFmode)
5052 dest_mode = V4SImode;
5053 else if (dmode == V2DFmode)
5054 dest_mode = V2DImode;
5055 else
5056 dest_mode = dmode;
5057
5058 mask = gen_reg_rtx (dest_mode);
5059 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5060
5061 if (vec_cmp_insn == -1)
5062 {
5063 bool swap_operands = false;
5064 bool try_again = false;
5065 switch (rcode)
5066 {
5067 case LT:
5068 rcode = GT;
5069 swap_operands = true;
5070 try_again = true;
5071 break;
5072 case LTU:
5073 rcode = GTU;
5074 swap_operands = true;
5075 try_again = true;
5076 break;
5077 case NE:
5078 /* Treat A != B as ~(A==B). */
5079 {
5080 enum insn_code nor_code;
5081 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5082 nor_code = optab_handler (one_cmpl_optab, (int)dest_mode)->insn_code;
5083 gcc_assert (nor_code != CODE_FOR_nothing);
5084 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
5085 if (dmode != dest_mode)
5086 {
5087 rtx temp = gen_reg_rtx (dest_mode);
5088 convert_move (temp, mask, 0);
5089 return temp;
5090 }
5091 return mask;
5092 }
5093 break;
5094 case GE:
5095 case GEU:
5096 case LE:
5097 case LEU:
5098 /* Try GT/GTU/LT/LTU OR EQ */
5099 {
5100 rtx c_rtx, eq_rtx;
5101 enum insn_code ior_code;
5102 enum rtx_code new_code;
5103
5104 switch (rcode)
5105 {
5106 case GE: new_code = GT; break;
5107 case GEU: new_code = GTU; break;
5108 case LE: new_code = LT; break;
5109 case LEU: new_code = LTU; break;
5110 default:
5111 gcc_unreachable ();
5112 }
5113
5114 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
5115 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
5116
5117 ior_code = optab_handler (ior_optab, (int)dest_mode)->insn_code;
5118 gcc_assert (ior_code != CODE_FOR_nothing);
5119 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
5120 if (dmode != dest_mode)
5121 {
5122 rtx temp = gen_reg_rtx (dest_mode);
5123 convert_move (temp, mask, 0);
5124 return temp;
5125 }
5126 return mask;
5127 }
5128 break;
5129 default:
5130 gcc_unreachable ();
5131 }
5132
5133 /* You only get two chances. */
5134 if (try_again)
5135 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
5136
5137 gcc_assert (vec_cmp_insn != -1);
5138
5139 if (swap_operands)
5140 {
5141 rtx tmp;
5142 tmp = op0;
5143 op0 = op1;
5144 op1 = tmp;
5145 }
5146 }
5147
5148 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
5149 if (dmode != dest_mode)
5150 {
5151 rtx temp = gen_reg_rtx (dest_mode);
5152 convert_move (temp, mask, 0);
5153 return temp;
5154 }
5155 return mask;
5156 }
5157
5158
5159 /* Emit vector conditional expression.
5160 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
5161 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
5162
5163 int
5164 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
5165 rtx cond, rtx cc_op0, rtx cc_op1)
5166 {
5167 enum machine_mode dest_mode = GET_MODE (dest);
5168 enum rtx_code rcode = GET_CODE (cond);
5169 rtx mask;
5170
5171 /* Get the vector mask for the given relational operations. */
5172 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
5173
5174 emit_insn(gen_selb (dest, op2, op1, mask));
5175
5176 return 1;
5177 }
5178
5179 static rtx
5180 spu_force_reg (enum machine_mode mode, rtx op)
5181 {
5182 rtx x, r;
5183 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
5184 {
5185 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
5186 || GET_MODE (op) == BLKmode)
5187 return force_reg (mode, convert_to_mode (mode, op, 0));
5188 abort ();
5189 }
5190
5191 r = force_reg (GET_MODE (op), op);
5192 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
5193 {
5194 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
5195 if (x)
5196 return x;
5197 }
5198
5199 x = gen_reg_rtx (mode);
5200 emit_insn (gen_spu_convert (x, r));
5201 return x;
5202 }
5203
5204 static void
5205 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
5206 {
5207 HOST_WIDE_INT v = 0;
5208 int lsbits;
5209 /* Check the range of immediate operands. */
5210 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
5211 {
5212 int range = p - SPU_BTI_7;
5213
5214 if (!CONSTANT_P (op))
5215 error ("%s expects an integer literal in the range [%d, %d].",
5216 d->name,
5217 spu_builtin_range[range].low, spu_builtin_range[range].high);
5218
5219 if (GET_CODE (op) == CONST
5220 && (GET_CODE (XEXP (op, 0)) == PLUS
5221 || GET_CODE (XEXP (op, 0)) == MINUS))
5222 {
5223 v = INTVAL (XEXP (XEXP (op, 0), 1));
5224 op = XEXP (XEXP (op, 0), 0);
5225 }
5226 else if (GET_CODE (op) == CONST_INT)
5227 v = INTVAL (op);
5228 else if (GET_CODE (op) == CONST_VECTOR
5229 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
5230 v = INTVAL (CONST_VECTOR_ELT (op, 0));
5231
5232 /* The default for v is 0 which is valid in every range. */
5233 if (v < spu_builtin_range[range].low
5234 || v > spu_builtin_range[range].high)
5235 error ("%s expects an integer literal in the range [%d, %d]. ("
5236 HOST_WIDE_INT_PRINT_DEC ")",
5237 d->name,
5238 spu_builtin_range[range].low, spu_builtin_range[range].high,
5239 v);
5240
5241 switch (p)
5242 {
5243 case SPU_BTI_S10_4:
5244 lsbits = 4;
5245 break;
5246 case SPU_BTI_U16_2:
5247 /* This is only used in lqa, and stqa. Even though the insns
5248 encode 16 bits of the address (all but the 2 least
5249 significant), only 14 bits are used because it is masked to
5250 be 16 byte aligned. */
5251 lsbits = 4;
5252 break;
5253 case SPU_BTI_S16_2:
5254 /* This is used for lqr and stqr. */
5255 lsbits = 2;
5256 break;
5257 default:
5258 lsbits = 0;
5259 }
5260
5261 if (GET_CODE (op) == LABEL_REF
5262 || (GET_CODE (op) == SYMBOL_REF
5263 && SYMBOL_REF_FUNCTION_P (op))
5264 || (v & ((1 << lsbits) - 1)) != 0)
5265 warning (0, "%d least significant bits of %s are ignored.", lsbits,
5266 d->name);
5267 }
5268 }
5269
5270
5271 static void
5272 expand_builtin_args (struct spu_builtin_description *d, tree exp,
5273 rtx target, rtx ops[])
5274 {
5275 enum insn_code icode = d->icode;
5276 int i = 0, a;
5277
5278 /* Expand the arguments into rtl. */
5279
5280 if (d->parm[0] != SPU_BTI_VOID)
5281 ops[i++] = target;
5282
5283 for (a = 0; i < insn_data[icode].n_operands; i++, a++)
5284 {
5285 tree arg = CALL_EXPR_ARG (exp, a);
5286 if (arg == 0)
5287 abort ();
5288 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, 0);
5289 }
5290 }
5291
5292 static rtx
5293 spu_expand_builtin_1 (struct spu_builtin_description *d,
5294 tree exp, rtx target)
5295 {
5296 rtx pat;
5297 rtx ops[8];
5298 enum insn_code icode = d->icode;
5299 enum machine_mode mode, tmode;
5300 int i, p;
5301 tree return_type;
5302
5303 /* Set up ops[] with values from arglist. */
5304 expand_builtin_args (d, exp, target, ops);
5305
5306 /* Handle the target operand which must be operand 0. */
5307 i = 0;
5308 if (d->parm[0] != SPU_BTI_VOID)
5309 {
5310
5311 /* We prefer the mode specified for the match_operand otherwise
5312 use the mode from the builtin function prototype. */
5313 tmode = insn_data[d->icode].operand[0].mode;
5314 if (tmode == VOIDmode)
5315 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
5316
5317 /* Try to use target because not using it can lead to extra copies
5318 and when we are using all of the registers extra copies leads
5319 to extra spills. */
5320 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
5321 ops[0] = target;
5322 else
5323 target = ops[0] = gen_reg_rtx (tmode);
5324
5325 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
5326 abort ();
5327
5328 i++;
5329 }
5330
5331 if (d->fcode == SPU_MASK_FOR_LOAD)
5332 {
5333 enum machine_mode mode = insn_data[icode].operand[1].mode;
5334 tree arg;
5335 rtx addr, op, pat;
5336
5337 /* get addr */
5338 arg = CALL_EXPR_ARG (exp, 0);
5339 gcc_assert (TREE_CODE (TREE_TYPE (arg)) == POINTER_TYPE);
5340 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
5341 addr = memory_address (mode, op);
5342
5343 /* negate addr */
5344 op = gen_reg_rtx (GET_MODE (addr));
5345 emit_insn (gen_rtx_SET (VOIDmode, op,
5346 gen_rtx_NEG (GET_MODE (addr), addr)));
5347 op = gen_rtx_MEM (mode, op);
5348
5349 pat = GEN_FCN (icode) (target, op);
5350 if (!pat)
5351 return 0;
5352 emit_insn (pat);
5353 return target;
5354 }
5355
5356 /* Ignore align_hint, but still expand it's args in case they have
5357 side effects. */
5358 if (icode == CODE_FOR_spu_align_hint)
5359 return 0;
5360
5361 /* Handle the rest of the operands. */
5362 for (p = 1; i < insn_data[icode].n_operands; i++, p++)
5363 {
5364 if (insn_data[d->icode].operand[i].mode != VOIDmode)
5365 mode = insn_data[d->icode].operand[i].mode;
5366 else
5367 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
5368
5369 /* mode can be VOIDmode here for labels */
5370
5371 /* For specific intrinsics with an immediate operand, e.g.,
5372 si_ai(), we sometimes need to convert the scalar argument to a
5373 vector argument by splatting the scalar. */
5374 if (VECTOR_MODE_P (mode)
5375 && (GET_CODE (ops[i]) == CONST_INT
5376 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
5377 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT)
5378 && d->parm[i] != SPU_BTI_QUADWORD)
5379 {
5380 if (GET_CODE (ops[i]) == CONST_INT)
5381 ops[i] = spu_const (mode, INTVAL (ops[i]));
5382 else
5383 {
5384 rtx reg = gen_reg_rtx (mode);
5385 enum machine_mode imode = GET_MODE_INNER (mode);
5386 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
5387 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
5388 if (imode != GET_MODE (ops[i]))
5389 ops[i] = convert_to_mode (imode, ops[i],
5390 TYPE_UNSIGNED (spu_builtin_types
5391 [d->parm[i]]));
5392 emit_insn (gen_spu_splats (reg, ops[i]));
5393 ops[i] = reg;
5394 }
5395 }
5396
5397 spu_check_builtin_parm (d, ops[i], d->parm[p]);
5398
5399 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
5400 ops[i] = spu_force_reg (mode, ops[i]);
5401 }
5402
5403 switch (insn_data[icode].n_operands)
5404 {
5405 case 0:
5406 pat = GEN_FCN (icode) (0);
5407 break;
5408 case 1:
5409 pat = GEN_FCN (icode) (ops[0]);
5410 break;
5411 case 2:
5412 pat = GEN_FCN (icode) (ops[0], ops[1]);
5413 break;
5414 case 3:
5415 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
5416 break;
5417 case 4:
5418 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
5419 break;
5420 case 5:
5421 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
5422 break;
5423 case 6:
5424 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
5425 break;
5426 default:
5427 abort ();
5428 }
5429
5430 if (!pat)
5431 abort ();
5432
5433 if (d->type == B_CALL || d->type == B_BISLED)
5434 emit_call_insn (pat);
5435 else if (d->type == B_JUMP)
5436 {
5437 emit_jump_insn (pat);
5438 emit_barrier ();
5439 }
5440 else
5441 emit_insn (pat);
5442
5443 return_type = spu_builtin_types[d->parm[0]];
5444 if (d->parm[0] != SPU_BTI_VOID
5445 && GET_MODE (target) != TYPE_MODE (return_type))
5446 {
5447 /* target is the return value. It should always be the mode of
5448 the builtin function prototype. */
5449 target = spu_force_reg (TYPE_MODE (return_type), target);
5450 }
5451
5452 return target;
5453 }
5454
5455 rtx
5456 spu_expand_builtin (tree exp,
5457 rtx target,
5458 rtx subtarget ATTRIBUTE_UNUSED,
5459 enum machine_mode mode ATTRIBUTE_UNUSED,
5460 int ignore ATTRIBUTE_UNUSED)
5461 {
5462 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
5463 unsigned int fcode = DECL_FUNCTION_CODE (fndecl) - END_BUILTINS;
5464 struct spu_builtin_description *d;
5465
5466 if (fcode < NUM_SPU_BUILTINS)
5467 {
5468 d = &spu_builtins[fcode];
5469
5470 return spu_expand_builtin_1 (d, exp, target);
5471 }
5472 abort ();
5473 }
5474
5475 /* Implement targetm.vectorize.builtin_mul_widen_even. */
5476 static tree
5477 spu_builtin_mul_widen_even (tree type)
5478 {
5479 switch (TYPE_MODE (type))
5480 {
5481 case V8HImode:
5482 if (TYPE_UNSIGNED (type))
5483 return spu_builtins[SPU_MULE_0].fndecl;
5484 else
5485 return spu_builtins[SPU_MULE_1].fndecl;
5486 break;
5487 default:
5488 return NULL_TREE;
5489 }
5490 }
5491
5492 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
5493 static tree
5494 spu_builtin_mul_widen_odd (tree type)
5495 {
5496 switch (TYPE_MODE (type))
5497 {
5498 case V8HImode:
5499 if (TYPE_UNSIGNED (type))
5500 return spu_builtins[SPU_MULO_1].fndecl;
5501 else
5502 return spu_builtins[SPU_MULO_0].fndecl;
5503 break;
5504 default:
5505 return NULL_TREE;
5506 }
5507 }
5508
5509 /* Implement targetm.vectorize.builtin_mask_for_load. */
5510 static tree
5511 spu_builtin_mask_for_load (void)
5512 {
5513 struct spu_builtin_description *d = &spu_builtins[SPU_MASK_FOR_LOAD];
5514 gcc_assert (d);
5515 return d->fndecl;
5516 }
5517
5518 /* Implement targetm.vectorize.builtin_vectorization_cost. */
5519 static int
5520 spu_builtin_vectorization_cost (bool runtime_test)
5521 {
5522 /* If the branch of the runtime test is taken - i.e. - the vectorized
5523 version is skipped - this incurs a misprediction cost (because the
5524 vectorized version is expected to be the fall-through). So we subtract
5525 the latency of a mispredicted branch from the costs that are incurred
5526 when the vectorized version is executed. */
5527 if (runtime_test)
5528 return -19;
5529 else
5530 return 0;
5531 }
5532
5533 /* Return true iff, data reference of TYPE can reach vector alignment (16)
5534 after applying N number of iterations. This routine does not determine
5535 how may iterations are required to reach desired alignment. */
5536
5537 static bool
5538 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
5539 {
5540 if (is_packed)
5541 return false;
5542
5543 /* All other types are naturally aligned. */
5544 return true;
5545 }
5546
5547 /* Count the total number of instructions in each pipe and return the
5548 maximum, which is used as the Minimum Iteration Interval (MII)
5549 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
5550 -2 are instructions that can go in pipe0 or pipe1. */
5551 static int
5552 spu_sms_res_mii (struct ddg *g)
5553 {
5554 int i;
5555 unsigned t[4] = {0, 0, 0, 0};
5556
5557 for (i = 0; i < g->num_nodes; i++)
5558 {
5559 rtx insn = g->nodes[i].insn;
5560 int p = get_pipe (insn) + 2;
5561
5562 assert (p >= 0);
5563 assert (p < 4);
5564
5565 t[p]++;
5566 if (dump_file && INSN_P (insn))
5567 fprintf (dump_file, "i%d %s %d %d\n",
5568 INSN_UID (insn),
5569 insn_data[INSN_CODE(insn)].name,
5570 p, t[p]);
5571 }
5572 if (dump_file)
5573 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
5574
5575 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
5576 }
5577
5578
5579 void
5580 spu_init_expanders (void)
5581 {
5582 /* HARD_FRAME_REGISTER is only 128 bit aligned when
5583 * frame_pointer_needed is true. We don't know that until we're
5584 * expanding the prologue. */
5585 if (cfun)
5586 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
5587 }
5588
5589 static enum machine_mode
5590 spu_libgcc_cmp_return_mode (void)
5591 {
5592
5593 /* For SPU word mode is TI mode so it is better to use SImode
5594 for compare returns. */
5595 return SImode;
5596 }
5597
5598 static enum machine_mode
5599 spu_libgcc_shift_count_mode (void)
5600 {
5601 /* For SPU word mode is TI mode so it is better to use SImode
5602 for shift counts. */
5603 return SImode;
5604 }