f9e5c82ea7a9385ac76b116f0572f82ca3547b26
[gcc.git] / gcc / config / spu / spu.c
1 /* Copyright (C) 2006, 2007, 2008, 2009, 2010 Free Software Foundation, Inc.
2
3 This file is free software; you can redistribute it and/or modify it under
4 the terms of the GNU General Public License as published by the Free
5 Software Foundation; either version 3 of the License, or (at your option)
6 any later version.
7
8 This file is distributed in the hope that it will be useful, but WITHOUT
9 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
10 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
11 for more details.
12
13 You should have received a copy of the GNU General Public License
14 along with GCC; see the file COPYING3. If not see
15 <http://www.gnu.org/licenses/>. */
16
17 #include "config.h"
18 #include "system.h"
19 #include "coretypes.h"
20 #include "tm.h"
21 #include "rtl.h"
22 #include "regs.h"
23 #include "hard-reg-set.h"
24 #include "insn-config.h"
25 #include "conditions.h"
26 #include "insn-attr.h"
27 #include "flags.h"
28 #include "recog.h"
29 #include "obstack.h"
30 #include "tree.h"
31 #include "expr.h"
32 #include "optabs.h"
33 #include "except.h"
34 #include "function.h"
35 #include "output.h"
36 #include "basic-block.h"
37 #include "integrate.h"
38 #include "diagnostic-core.h"
39 #include "toplev.h"
40 #include "ggc.h"
41 #include "hashtab.h"
42 #include "tm_p.h"
43 #include "target.h"
44 #include "target-def.h"
45 #include "langhooks.h"
46 #include "reload.h"
47 #include "cfglayout.h"
48 #include "sched-int.h"
49 #include "params.h"
50 #include "assert.h"
51 #include "machmode.h"
52 #include "gimple.h"
53 #include "tm-constrs.h"
54 #include "ddg.h"
55 #include "sbitmap.h"
56 #include "timevar.h"
57 #include "df.h"
58
59 /* Builtin types, data and prototypes. */
60
61 enum spu_builtin_type_index
62 {
63 SPU_BTI_END_OF_PARAMS,
64
65 /* We create new type nodes for these. */
66 SPU_BTI_V16QI,
67 SPU_BTI_V8HI,
68 SPU_BTI_V4SI,
69 SPU_BTI_V2DI,
70 SPU_BTI_V4SF,
71 SPU_BTI_V2DF,
72 SPU_BTI_UV16QI,
73 SPU_BTI_UV8HI,
74 SPU_BTI_UV4SI,
75 SPU_BTI_UV2DI,
76
77 /* A 16-byte type. (Implemented with V16QI_type_node) */
78 SPU_BTI_QUADWORD,
79
80 /* These all correspond to intSI_type_node */
81 SPU_BTI_7,
82 SPU_BTI_S7,
83 SPU_BTI_U7,
84 SPU_BTI_S10,
85 SPU_BTI_S10_4,
86 SPU_BTI_U14,
87 SPU_BTI_16,
88 SPU_BTI_S16,
89 SPU_BTI_S16_2,
90 SPU_BTI_U16,
91 SPU_BTI_U16_2,
92 SPU_BTI_U18,
93
94 /* These correspond to the standard types */
95 SPU_BTI_INTQI,
96 SPU_BTI_INTHI,
97 SPU_BTI_INTSI,
98 SPU_BTI_INTDI,
99
100 SPU_BTI_UINTQI,
101 SPU_BTI_UINTHI,
102 SPU_BTI_UINTSI,
103 SPU_BTI_UINTDI,
104
105 SPU_BTI_FLOAT,
106 SPU_BTI_DOUBLE,
107
108 SPU_BTI_VOID,
109 SPU_BTI_PTR,
110
111 SPU_BTI_MAX
112 };
113
114 #define V16QI_type_node (spu_builtin_types[SPU_BTI_V16QI])
115 #define V8HI_type_node (spu_builtin_types[SPU_BTI_V8HI])
116 #define V4SI_type_node (spu_builtin_types[SPU_BTI_V4SI])
117 #define V2DI_type_node (spu_builtin_types[SPU_BTI_V2DI])
118 #define V4SF_type_node (spu_builtin_types[SPU_BTI_V4SF])
119 #define V2DF_type_node (spu_builtin_types[SPU_BTI_V2DF])
120 #define unsigned_V16QI_type_node (spu_builtin_types[SPU_BTI_UV16QI])
121 #define unsigned_V8HI_type_node (spu_builtin_types[SPU_BTI_UV8HI])
122 #define unsigned_V4SI_type_node (spu_builtin_types[SPU_BTI_UV4SI])
123 #define unsigned_V2DI_type_node (spu_builtin_types[SPU_BTI_UV2DI])
124
125 static GTY(()) tree spu_builtin_types[SPU_BTI_MAX];
126
127 struct spu_builtin_range
128 {
129 int low, high;
130 };
131
132 static struct spu_builtin_range spu_builtin_range[] = {
133 {-0x40ll, 0x7fll}, /* SPU_BTI_7 */
134 {-0x40ll, 0x3fll}, /* SPU_BTI_S7 */
135 {0ll, 0x7fll}, /* SPU_BTI_U7 */
136 {-0x200ll, 0x1ffll}, /* SPU_BTI_S10 */
137 {-0x2000ll, 0x1fffll}, /* SPU_BTI_S10_4 */
138 {0ll, 0x3fffll}, /* SPU_BTI_U14 */
139 {-0x8000ll, 0xffffll}, /* SPU_BTI_16 */
140 {-0x8000ll, 0x7fffll}, /* SPU_BTI_S16 */
141 {-0x20000ll, 0x1ffffll}, /* SPU_BTI_S16_2 */
142 {0ll, 0xffffll}, /* SPU_BTI_U16 */
143 {0ll, 0x3ffffll}, /* SPU_BTI_U16_2 */
144 {0ll, 0x3ffffll}, /* SPU_BTI_U18 */
145 };
146
147 \f
148 /* Target specific attribute specifications. */
149 char regs_ever_allocated[FIRST_PSEUDO_REGISTER];
150
151 /* Prototypes and external defs. */
152 static void spu_option_override (void);
153 static void spu_option_optimization (int, int);
154 static void spu_option_default_params (void);
155 static void spu_init_builtins (void);
156 static tree spu_builtin_decl (unsigned, bool);
157 static bool spu_scalar_mode_supported_p (enum machine_mode mode);
158 static bool spu_vector_mode_supported_p (enum machine_mode mode);
159 static bool spu_legitimate_address_p (enum machine_mode, rtx, bool);
160 static bool spu_addr_space_legitimate_address_p (enum machine_mode, rtx,
161 bool, addr_space_t);
162 static rtx adjust_operand (rtx op, HOST_WIDE_INT * start);
163 static rtx get_pic_reg (void);
164 static int need_to_save_reg (int regno, int saving);
165 static rtx frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset);
166 static rtx frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset);
167 static rtx frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm,
168 rtx scratch);
169 static void emit_nop_for_insn (rtx insn);
170 static bool insn_clobbers_hbr (rtx insn);
171 static void spu_emit_branch_hint (rtx before, rtx branch, rtx target,
172 int distance, sbitmap blocks);
173 static rtx spu_emit_vector_compare (enum rtx_code rcode, rtx op0, rtx op1,
174 enum machine_mode dmode);
175 static rtx get_branch_target (rtx branch);
176 static void spu_machine_dependent_reorg (void);
177 static int spu_sched_issue_rate (void);
178 static int spu_sched_variable_issue (FILE * dump, int verbose, rtx insn,
179 int can_issue_more);
180 static int get_pipe (rtx insn);
181 static int spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost);
182 static void spu_sched_init_global (FILE *, int, int);
183 static void spu_sched_init (FILE *, int, int);
184 static int spu_sched_reorder (FILE *, int, rtx *, int *, int);
185 static tree spu_handle_fndecl_attribute (tree * node, tree name, tree args,
186 int flags,
187 bool *no_add_attrs);
188 static tree spu_handle_vector_attribute (tree * node, tree name, tree args,
189 int flags,
190 bool *no_add_attrs);
191 static int spu_naked_function_p (tree func);
192 static bool spu_pass_by_reference (CUMULATIVE_ARGS *cum, enum machine_mode mode,
193 const_tree type, bool named);
194 static rtx spu_function_arg (CUMULATIVE_ARGS *cum, enum machine_mode mode,
195 const_tree type, bool named);
196 static void spu_function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
197 const_tree type, bool named);
198 static tree spu_build_builtin_va_list (void);
199 static void spu_va_start (tree, rtx);
200 static tree spu_gimplify_va_arg_expr (tree valist, tree type,
201 gimple_seq * pre_p, gimple_seq * post_p);
202 static int store_with_one_insn_p (rtx mem);
203 static int mem_is_padded_component_ref (rtx x);
204 static int reg_aligned_for_addr (rtx x);
205 static bool spu_assemble_integer (rtx x, unsigned int size, int aligned_p);
206 static void spu_asm_globalize_label (FILE * file, const char *name);
207 static bool spu_rtx_costs (rtx x, int code, int outer_code,
208 int *total, bool speed);
209 static bool spu_function_ok_for_sibcall (tree decl, tree exp);
210 static void spu_init_libfuncs (void);
211 static bool spu_return_in_memory (const_tree type, const_tree fntype);
212 static void fix_range (const char *);
213 static void spu_encode_section_info (tree, rtx, int);
214 static rtx spu_legitimize_address (rtx, rtx, enum machine_mode);
215 static rtx spu_addr_space_legitimize_address (rtx, rtx, enum machine_mode,
216 addr_space_t);
217 static tree spu_builtin_mul_widen_even (tree);
218 static tree spu_builtin_mul_widen_odd (tree);
219 static tree spu_builtin_mask_for_load (void);
220 static int spu_builtin_vectorization_cost (enum vect_cost_for_stmt, tree, int);
221 static bool spu_vector_alignment_reachable (const_tree, bool);
222 static tree spu_builtin_vec_perm (tree, tree *);
223 static enum machine_mode spu_addr_space_pointer_mode (addr_space_t);
224 static enum machine_mode spu_addr_space_address_mode (addr_space_t);
225 static bool spu_addr_space_subset_p (addr_space_t, addr_space_t);
226 static rtx spu_addr_space_convert (rtx, tree, tree);
227 static int spu_sms_res_mii (struct ddg *g);
228 static void asm_file_start (void);
229 static unsigned int spu_section_type_flags (tree, const char *, int);
230 static section *spu_select_section (tree, int, unsigned HOST_WIDE_INT);
231 static void spu_unique_section (tree, int);
232 static rtx spu_expand_load (rtx, rtx, rtx, int);
233 static void spu_trampoline_init (rtx, tree, rtx);
234
235 /* Which instruction set architecture to use. */
236 int spu_arch;
237 /* Which cpu are we tuning for. */
238 int spu_tune;
239
240 /* The hardware requires 8 insns between a hint and the branch it
241 effects. This variable describes how many rtl instructions the
242 compiler needs to see before inserting a hint, and then the compiler
243 will insert enough nops to make it at least 8 insns. The default is
244 for the compiler to allow up to 2 nops be emitted. The nops are
245 inserted in pairs, so we round down. */
246 int spu_hint_dist = (8*4) - (2*4);
247
248 /* Determines whether we run variable tracking in machine dependent
249 reorganization. */
250 static int spu_flag_var_tracking;
251
252 enum spu_immediate {
253 SPU_NONE,
254 SPU_IL,
255 SPU_ILA,
256 SPU_ILH,
257 SPU_ILHU,
258 SPU_ORI,
259 SPU_ORHI,
260 SPU_ORBI,
261 SPU_IOHL
262 };
263 enum immediate_class
264 {
265 IC_POOL, /* constant pool */
266 IC_IL1, /* one il* instruction */
267 IC_IL2, /* both ilhu and iohl instructions */
268 IC_IL1s, /* one il* instruction */
269 IC_IL2s, /* both ilhu and iohl instructions */
270 IC_FSMBI, /* the fsmbi instruction */
271 IC_CPAT, /* one of the c*d instructions */
272 IC_FSMBI2 /* fsmbi plus 1 other instruction */
273 };
274
275 static enum spu_immediate which_immediate_load (HOST_WIDE_INT val);
276 static enum spu_immediate which_logical_immediate (HOST_WIDE_INT val);
277 static int cpat_info(unsigned char *arr, int size, int *prun, int *pstart);
278 static enum immediate_class classify_immediate (rtx op,
279 enum machine_mode mode);
280
281 static enum machine_mode spu_unwind_word_mode (void);
282
283 static enum machine_mode
284 spu_libgcc_cmp_return_mode (void);
285
286 static enum machine_mode
287 spu_libgcc_shift_count_mode (void);
288
289 /* Pointer mode for __ea references. */
290 #define EAmode (spu_ea_model != 32 ? DImode : SImode)
291
292 \f
293 /* Table of machine attributes. */
294 static const struct attribute_spec spu_attribute_table[] =
295 {
296 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
297 { "naked", 0, 0, true, false, false, spu_handle_fndecl_attribute },
298 { "spu_vector", 0, 0, false, true, false, spu_handle_vector_attribute },
299 { NULL, 0, 0, false, false, false, NULL }
300 };
301 \f
302 /* TARGET overrides. */
303
304 #undef TARGET_ADDR_SPACE_POINTER_MODE
305 #define TARGET_ADDR_SPACE_POINTER_MODE spu_addr_space_pointer_mode
306
307 #undef TARGET_ADDR_SPACE_ADDRESS_MODE
308 #define TARGET_ADDR_SPACE_ADDRESS_MODE spu_addr_space_address_mode
309
310 #undef TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P
311 #define TARGET_ADDR_SPACE_LEGITIMATE_ADDRESS_P \
312 spu_addr_space_legitimate_address_p
313
314 #undef TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS
315 #define TARGET_ADDR_SPACE_LEGITIMIZE_ADDRESS spu_addr_space_legitimize_address
316
317 #undef TARGET_ADDR_SPACE_SUBSET_P
318 #define TARGET_ADDR_SPACE_SUBSET_P spu_addr_space_subset_p
319
320 #undef TARGET_ADDR_SPACE_CONVERT
321 #define TARGET_ADDR_SPACE_CONVERT spu_addr_space_convert
322
323 #undef TARGET_INIT_BUILTINS
324 #define TARGET_INIT_BUILTINS spu_init_builtins
325 #undef TARGET_BUILTIN_DECL
326 #define TARGET_BUILTIN_DECL spu_builtin_decl
327
328 #undef TARGET_EXPAND_BUILTIN
329 #define TARGET_EXPAND_BUILTIN spu_expand_builtin
330
331 #undef TARGET_UNWIND_WORD_MODE
332 #define TARGET_UNWIND_WORD_MODE spu_unwind_word_mode
333
334 #undef TARGET_LEGITIMIZE_ADDRESS
335 #define TARGET_LEGITIMIZE_ADDRESS spu_legitimize_address
336
337 /* The current assembler doesn't like .4byte foo@ppu, so use the normal .long
338 and .quad for the debugger. When it is known that the assembler is fixed,
339 these can be removed. */
340 #undef TARGET_ASM_UNALIGNED_SI_OP
341 #define TARGET_ASM_UNALIGNED_SI_OP "\t.long\t"
342
343 #undef TARGET_ASM_ALIGNED_DI_OP
344 #define TARGET_ASM_ALIGNED_DI_OP "\t.quad\t"
345
346 /* The .8byte directive doesn't seem to work well for a 32 bit
347 architecture. */
348 #undef TARGET_ASM_UNALIGNED_DI_OP
349 #define TARGET_ASM_UNALIGNED_DI_OP NULL
350
351 #undef TARGET_RTX_COSTS
352 #define TARGET_RTX_COSTS spu_rtx_costs
353
354 #undef TARGET_ADDRESS_COST
355 #define TARGET_ADDRESS_COST hook_int_rtx_bool_0
356
357 #undef TARGET_SCHED_ISSUE_RATE
358 #define TARGET_SCHED_ISSUE_RATE spu_sched_issue_rate
359
360 #undef TARGET_SCHED_INIT_GLOBAL
361 #define TARGET_SCHED_INIT_GLOBAL spu_sched_init_global
362
363 #undef TARGET_SCHED_INIT
364 #define TARGET_SCHED_INIT spu_sched_init
365
366 #undef TARGET_SCHED_VARIABLE_ISSUE
367 #define TARGET_SCHED_VARIABLE_ISSUE spu_sched_variable_issue
368
369 #undef TARGET_SCHED_REORDER
370 #define TARGET_SCHED_REORDER spu_sched_reorder
371
372 #undef TARGET_SCHED_REORDER2
373 #define TARGET_SCHED_REORDER2 spu_sched_reorder
374
375 #undef TARGET_SCHED_ADJUST_COST
376 #define TARGET_SCHED_ADJUST_COST spu_sched_adjust_cost
377
378 #undef TARGET_ATTRIBUTE_TABLE
379 #define TARGET_ATTRIBUTE_TABLE spu_attribute_table
380
381 #undef TARGET_ASM_INTEGER
382 #define TARGET_ASM_INTEGER spu_assemble_integer
383
384 #undef TARGET_SCALAR_MODE_SUPPORTED_P
385 #define TARGET_SCALAR_MODE_SUPPORTED_P spu_scalar_mode_supported_p
386
387 #undef TARGET_VECTOR_MODE_SUPPORTED_P
388 #define TARGET_VECTOR_MODE_SUPPORTED_P spu_vector_mode_supported_p
389
390 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
391 #define TARGET_FUNCTION_OK_FOR_SIBCALL spu_function_ok_for_sibcall
392
393 #undef TARGET_ASM_GLOBALIZE_LABEL
394 #define TARGET_ASM_GLOBALIZE_LABEL spu_asm_globalize_label
395
396 #undef TARGET_PASS_BY_REFERENCE
397 #define TARGET_PASS_BY_REFERENCE spu_pass_by_reference
398
399 #undef TARGET_FUNCTION_ARG
400 #define TARGET_FUNCTION_ARG spu_function_arg
401
402 #undef TARGET_FUNCTION_ARG_ADVANCE
403 #define TARGET_FUNCTION_ARG_ADVANCE spu_function_arg_advance
404
405 #undef TARGET_MUST_PASS_IN_STACK
406 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
407
408 #undef TARGET_BUILD_BUILTIN_VA_LIST
409 #define TARGET_BUILD_BUILTIN_VA_LIST spu_build_builtin_va_list
410
411 #undef TARGET_EXPAND_BUILTIN_VA_START
412 #define TARGET_EXPAND_BUILTIN_VA_START spu_va_start
413
414 #undef TARGET_SETUP_INCOMING_VARARGS
415 #define TARGET_SETUP_INCOMING_VARARGS spu_setup_incoming_varargs
416
417 #undef TARGET_MACHINE_DEPENDENT_REORG
418 #define TARGET_MACHINE_DEPENDENT_REORG spu_machine_dependent_reorg
419
420 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
421 #define TARGET_GIMPLIFY_VA_ARG_EXPR spu_gimplify_va_arg_expr
422
423 #undef TARGET_DEFAULT_TARGET_FLAGS
424 #define TARGET_DEFAULT_TARGET_FLAGS (TARGET_DEFAULT)
425
426 #undef TARGET_INIT_LIBFUNCS
427 #define TARGET_INIT_LIBFUNCS spu_init_libfuncs
428
429 #undef TARGET_RETURN_IN_MEMORY
430 #define TARGET_RETURN_IN_MEMORY spu_return_in_memory
431
432 #undef TARGET_ENCODE_SECTION_INFO
433 #define TARGET_ENCODE_SECTION_INFO spu_encode_section_info
434
435 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN
436 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_EVEN spu_builtin_mul_widen_even
437
438 #undef TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD
439 #define TARGET_VECTORIZE_BUILTIN_MUL_WIDEN_ODD spu_builtin_mul_widen_odd
440
441 #undef TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD
442 #define TARGET_VECTORIZE_BUILTIN_MASK_FOR_LOAD spu_builtin_mask_for_load
443
444 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
445 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST spu_builtin_vectorization_cost
446
447 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
448 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE spu_vector_alignment_reachable
449
450 #undef TARGET_VECTORIZE_BUILTIN_VEC_PERM
451 #define TARGET_VECTORIZE_BUILTIN_VEC_PERM spu_builtin_vec_perm
452
453 #undef TARGET_LIBGCC_CMP_RETURN_MODE
454 #define TARGET_LIBGCC_CMP_RETURN_MODE spu_libgcc_cmp_return_mode
455
456 #undef TARGET_LIBGCC_SHIFT_COUNT_MODE
457 #define TARGET_LIBGCC_SHIFT_COUNT_MODE spu_libgcc_shift_count_mode
458
459 #undef TARGET_SCHED_SMS_RES_MII
460 #define TARGET_SCHED_SMS_RES_MII spu_sms_res_mii
461
462 #undef TARGET_ASM_FILE_START
463 #define TARGET_ASM_FILE_START asm_file_start
464
465 #undef TARGET_SECTION_TYPE_FLAGS
466 #define TARGET_SECTION_TYPE_FLAGS spu_section_type_flags
467
468 #undef TARGET_ASM_SELECT_SECTION
469 #define TARGET_ASM_SELECT_SECTION spu_select_section
470
471 #undef TARGET_ASM_UNIQUE_SECTION
472 #define TARGET_ASM_UNIQUE_SECTION spu_unique_section
473
474 #undef TARGET_LEGITIMATE_ADDRESS_P
475 #define TARGET_LEGITIMATE_ADDRESS_P spu_legitimate_address_p
476
477 #undef TARGET_TRAMPOLINE_INIT
478 #define TARGET_TRAMPOLINE_INIT spu_trampoline_init
479
480 #undef TARGET_OPTION_OVERRIDE
481 #define TARGET_OPTION_OVERRIDE spu_option_override
482
483 #undef TARGET_OPTION_OPTIMIZATION
484 #define TARGET_OPTION_OPTIMIZATION spu_option_optimization
485
486 #undef TARGET_OPTION_DEFAULT_PARAMS
487 #define TARGET_OPTION_DEFAULT_PARAMS spu_option_default_params
488
489 #undef TARGET_EXCEPT_UNWIND_INFO
490 #define TARGET_EXCEPT_UNWIND_INFO sjlj_except_unwind_info
491
492 struct gcc_target targetm = TARGET_INITIALIZER;
493
494 static void
495 spu_option_optimization (int level ATTRIBUTE_UNUSED, int size ATTRIBUTE_UNUSED)
496 {
497 /* With so many registers this is better on by default. */
498 flag_rename_registers = 1;
499 }
500
501 /* Implement TARGET_OPTION_DEFAULT_PARAMS. */
502 static void
503 spu_option_default_params (void)
504 {
505 /* Override some of the default param values. With so many registers
506 larger values are better for these params. */
507 set_default_param_value (PARAM_MAX_PENDING_LIST_LENGTH, 128);
508 }
509
510 /* Implement TARGET_OPTION_OVERRIDE. */
511 static void
512 spu_option_override (void)
513 {
514 /* Small loops will be unpeeled at -O3. For SPU it is more important
515 to keep code small by default. */
516 if (!flag_unroll_loops && !flag_peel_loops)
517 maybe_set_param_value (PARAM_MAX_COMPLETELY_PEEL_TIMES, 1,
518 global_options.x_param_values,
519 global_options_set.x_param_values);
520
521 flag_omit_frame_pointer = 1;
522
523 /* Functions must be 8 byte aligned so we correctly handle dual issue */
524 if (align_functions < 8)
525 align_functions = 8;
526
527 spu_hint_dist = 8*4 - spu_max_nops*4;
528 if (spu_hint_dist < 0)
529 spu_hint_dist = 0;
530
531 if (spu_fixed_range_string)
532 fix_range (spu_fixed_range_string);
533
534 /* Determine processor architectural level. */
535 if (spu_arch_string)
536 {
537 if (strcmp (&spu_arch_string[0], "cell") == 0)
538 spu_arch = PROCESSOR_CELL;
539 else if (strcmp (&spu_arch_string[0], "celledp") == 0)
540 spu_arch = PROCESSOR_CELLEDP;
541 else
542 error ("Unknown architecture '%s'", &spu_arch_string[0]);
543 }
544
545 /* Determine processor to tune for. */
546 if (spu_tune_string)
547 {
548 if (strcmp (&spu_tune_string[0], "cell") == 0)
549 spu_tune = PROCESSOR_CELL;
550 else if (strcmp (&spu_tune_string[0], "celledp") == 0)
551 spu_tune = PROCESSOR_CELLEDP;
552 else
553 error ("Unknown architecture '%s'", &spu_tune_string[0]);
554 }
555
556 /* Change defaults according to the processor architecture. */
557 if (spu_arch == PROCESSOR_CELLEDP)
558 {
559 /* If no command line option has been otherwise specified, change
560 the default to -mno-safe-hints on celledp -- only the original
561 Cell/B.E. processors require this workaround. */
562 if (!(target_flags_explicit & MASK_SAFE_HINTS))
563 target_flags &= ~MASK_SAFE_HINTS;
564 }
565
566 REAL_MODE_FORMAT (SFmode) = &spu_single_format;
567 }
568 \f
569 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
570 struct attribute_spec.handler. */
571
572 /* True if MODE is valid for the target. By "valid", we mean able to
573 be manipulated in non-trivial ways. In particular, this means all
574 the arithmetic is supported. */
575 static bool
576 spu_scalar_mode_supported_p (enum machine_mode mode)
577 {
578 switch (mode)
579 {
580 case QImode:
581 case HImode:
582 case SImode:
583 case SFmode:
584 case DImode:
585 case TImode:
586 case DFmode:
587 return true;
588
589 default:
590 return false;
591 }
592 }
593
594 /* Similarly for vector modes. "Supported" here is less strict. At
595 least some operations are supported; need to check optabs or builtins
596 for further details. */
597 static bool
598 spu_vector_mode_supported_p (enum machine_mode mode)
599 {
600 switch (mode)
601 {
602 case V16QImode:
603 case V8HImode:
604 case V4SImode:
605 case V2DImode:
606 case V4SFmode:
607 case V2DFmode:
608 return true;
609
610 default:
611 return false;
612 }
613 }
614
615 /* GCC assumes that in a paradoxical SUBREG the inner mode occupies the
616 least significant bytes of the outer mode. This function returns
617 TRUE for the SUBREG's where this is correct. */
618 int
619 valid_subreg (rtx op)
620 {
621 enum machine_mode om = GET_MODE (op);
622 enum machine_mode im = GET_MODE (SUBREG_REG (op));
623 return om != VOIDmode && im != VOIDmode
624 && (GET_MODE_SIZE (im) == GET_MODE_SIZE (om)
625 || (GET_MODE_SIZE (im) <= 4 && GET_MODE_SIZE (om) <= 4)
626 || (GET_MODE_SIZE (im) >= 16 && GET_MODE_SIZE (om) >= 16));
627 }
628
629 /* When insv and ext[sz]v ar passed a TI SUBREG, we want to strip it off
630 and adjust the start offset. */
631 static rtx
632 adjust_operand (rtx op, HOST_WIDE_INT * start)
633 {
634 enum machine_mode mode;
635 int op_size;
636 /* Strip any paradoxical SUBREG. */
637 if (GET_CODE (op) == SUBREG
638 && (GET_MODE_BITSIZE (GET_MODE (op))
639 > GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)))))
640 {
641 if (start)
642 *start -=
643 GET_MODE_BITSIZE (GET_MODE (op)) -
644 GET_MODE_BITSIZE (GET_MODE (SUBREG_REG (op)));
645 op = SUBREG_REG (op);
646 }
647 /* If it is smaller than SI, assure a SUBREG */
648 op_size = GET_MODE_BITSIZE (GET_MODE (op));
649 if (op_size < 32)
650 {
651 if (start)
652 *start += 32 - op_size;
653 op_size = 32;
654 }
655 /* If it is not a MODE_INT (and/or it is smaller than SI) add a SUBREG. */
656 mode = mode_for_size (op_size, MODE_INT, 0);
657 if (mode != GET_MODE (op))
658 op = gen_rtx_SUBREG (mode, op, 0);
659 return op;
660 }
661
662 void
663 spu_expand_extv (rtx ops[], int unsignedp)
664 {
665 rtx dst = ops[0], src = ops[1];
666 HOST_WIDE_INT width = INTVAL (ops[2]);
667 HOST_WIDE_INT start = INTVAL (ops[3]);
668 HOST_WIDE_INT align_mask;
669 rtx s0, s1, mask, r0;
670
671 gcc_assert (REG_P (dst) && GET_MODE (dst) == TImode);
672
673 if (MEM_P (src))
674 {
675 /* First, determine if we need 1 TImode load or 2. We need only 1
676 if the bits being extracted do not cross the alignment boundary
677 as determined by the MEM and its address. */
678
679 align_mask = -MEM_ALIGN (src);
680 if ((start & align_mask) == ((start + width - 1) & align_mask))
681 {
682 /* Alignment is sufficient for 1 load. */
683 s0 = gen_reg_rtx (TImode);
684 r0 = spu_expand_load (s0, 0, src, start / 8);
685 start &= 7;
686 if (r0)
687 emit_insn (gen_rotqby_ti (s0, s0, r0));
688 }
689 else
690 {
691 /* Need 2 loads. */
692 s0 = gen_reg_rtx (TImode);
693 s1 = gen_reg_rtx (TImode);
694 r0 = spu_expand_load (s0, s1, src, start / 8);
695 start &= 7;
696
697 gcc_assert (start + width <= 128);
698 if (r0)
699 {
700 rtx r1 = gen_reg_rtx (SImode);
701 mask = gen_reg_rtx (TImode);
702 emit_move_insn (mask, GEN_INT (-1));
703 emit_insn (gen_rotqby_ti (s0, s0, r0));
704 emit_insn (gen_rotqby_ti (s1, s1, r0));
705 if (GET_CODE (r0) == CONST_INT)
706 r1 = GEN_INT (INTVAL (r0) & 15);
707 else
708 emit_insn (gen_andsi3 (r1, r0, GEN_INT (15)));
709 emit_insn (gen_shlqby_ti (mask, mask, r1));
710 emit_insn (gen_selb (s0, s1, s0, mask));
711 }
712 }
713
714 }
715 else if (GET_CODE (src) == SUBREG)
716 {
717 rtx r = SUBREG_REG (src);
718 gcc_assert (REG_P (r) && SCALAR_INT_MODE_P (GET_MODE (r)));
719 s0 = gen_reg_rtx (TImode);
720 if (GET_MODE_SIZE (GET_MODE (r)) < GET_MODE_SIZE (TImode))
721 emit_insn (gen_rtx_SET (VOIDmode, s0, gen_rtx_ZERO_EXTEND (TImode, r)));
722 else
723 emit_move_insn (s0, src);
724 }
725 else
726 {
727 gcc_assert (REG_P (src) && GET_MODE (src) == TImode);
728 s0 = gen_reg_rtx (TImode);
729 emit_move_insn (s0, src);
730 }
731
732 /* Now s0 is TImode and contains the bits to extract at start. */
733
734 if (start)
735 emit_insn (gen_rotlti3 (s0, s0, GEN_INT (start)));
736
737 if (128 - width)
738 {
739 tree c = build_int_cst (NULL_TREE, 128 - width);
740 s0 = expand_shift (RSHIFT_EXPR, TImode, s0, c, s0, unsignedp);
741 }
742
743 emit_move_insn (dst, s0);
744 }
745
746 void
747 spu_expand_insv (rtx ops[])
748 {
749 HOST_WIDE_INT width = INTVAL (ops[1]);
750 HOST_WIDE_INT start = INTVAL (ops[2]);
751 HOST_WIDE_INT maskbits;
752 enum machine_mode dst_mode, src_mode;
753 rtx dst = ops[0], src = ops[3];
754 int dst_size, src_size;
755 rtx mask;
756 rtx shift_reg;
757 int shift;
758
759
760 if (GET_CODE (ops[0]) == MEM)
761 dst = gen_reg_rtx (TImode);
762 else
763 dst = adjust_operand (dst, &start);
764 dst_mode = GET_MODE (dst);
765 dst_size = GET_MODE_BITSIZE (GET_MODE (dst));
766
767 if (CONSTANT_P (src))
768 {
769 enum machine_mode m =
770 (width <= 32 ? SImode : width <= 64 ? DImode : TImode);
771 src = force_reg (m, convert_to_mode (m, src, 0));
772 }
773 src = adjust_operand (src, 0);
774 src_mode = GET_MODE (src);
775 src_size = GET_MODE_BITSIZE (GET_MODE (src));
776
777 mask = gen_reg_rtx (dst_mode);
778 shift_reg = gen_reg_rtx (dst_mode);
779 shift = dst_size - start - width;
780
781 /* It's not safe to use subreg here because the compiler assumes
782 that the SUBREG_REG is right justified in the SUBREG. */
783 convert_move (shift_reg, src, 1);
784
785 if (shift > 0)
786 {
787 switch (dst_mode)
788 {
789 case SImode:
790 emit_insn (gen_ashlsi3 (shift_reg, shift_reg, GEN_INT (shift)));
791 break;
792 case DImode:
793 emit_insn (gen_ashldi3 (shift_reg, shift_reg, GEN_INT (shift)));
794 break;
795 case TImode:
796 emit_insn (gen_ashlti3 (shift_reg, shift_reg, GEN_INT (shift)));
797 break;
798 default:
799 abort ();
800 }
801 }
802 else if (shift < 0)
803 abort ();
804
805 switch (dst_size)
806 {
807 case 32:
808 maskbits = (-1ll << (32 - width - start));
809 if (start)
810 maskbits += (1ll << (32 - start));
811 emit_move_insn (mask, GEN_INT (maskbits));
812 break;
813 case 64:
814 maskbits = (-1ll << (64 - width - start));
815 if (start)
816 maskbits += (1ll << (64 - start));
817 emit_move_insn (mask, GEN_INT (maskbits));
818 break;
819 case 128:
820 {
821 unsigned char arr[16];
822 int i = start / 8;
823 memset (arr, 0, sizeof (arr));
824 arr[i] = 0xff >> (start & 7);
825 for (i++; i <= (start + width - 1) / 8; i++)
826 arr[i] = 0xff;
827 arr[i - 1] &= 0xff << (7 - ((start + width - 1) & 7));
828 emit_move_insn (mask, array_to_constant (TImode, arr));
829 }
830 break;
831 default:
832 abort ();
833 }
834 if (GET_CODE (ops[0]) == MEM)
835 {
836 rtx low = gen_reg_rtx (SImode);
837 rtx rotl = gen_reg_rtx (SImode);
838 rtx mask0 = gen_reg_rtx (TImode);
839 rtx addr;
840 rtx addr0;
841 rtx addr1;
842 rtx mem;
843
844 addr = force_reg (Pmode, XEXP (ops[0], 0));
845 addr0 = gen_rtx_AND (Pmode, addr, GEN_INT (-16));
846 emit_insn (gen_andsi3 (low, addr, GEN_INT (15)));
847 emit_insn (gen_negsi2 (rotl, low));
848 emit_insn (gen_rotqby_ti (shift_reg, shift_reg, rotl));
849 emit_insn (gen_rotqmby_ti (mask0, mask, rotl));
850 mem = change_address (ops[0], TImode, addr0);
851 set_mem_alias_set (mem, 0);
852 emit_move_insn (dst, mem);
853 emit_insn (gen_selb (dst, dst, shift_reg, mask0));
854 if (start + width > MEM_ALIGN (ops[0]))
855 {
856 rtx shl = gen_reg_rtx (SImode);
857 rtx mask1 = gen_reg_rtx (TImode);
858 rtx dst1 = gen_reg_rtx (TImode);
859 rtx mem1;
860 addr1 = plus_constant (addr, 16);
861 addr1 = gen_rtx_AND (Pmode, addr1, GEN_INT (-16));
862 emit_insn (gen_subsi3 (shl, GEN_INT (16), low));
863 emit_insn (gen_shlqby_ti (mask1, mask, shl));
864 mem1 = change_address (ops[0], TImode, addr1);
865 set_mem_alias_set (mem1, 0);
866 emit_move_insn (dst1, mem1);
867 emit_insn (gen_selb (dst1, dst1, shift_reg, mask1));
868 emit_move_insn (mem1, dst1);
869 }
870 emit_move_insn (mem, dst);
871 }
872 else
873 emit_insn (gen_selb (dst, copy_rtx (dst), shift_reg, mask));
874 }
875
876
877 int
878 spu_expand_block_move (rtx ops[])
879 {
880 HOST_WIDE_INT bytes, align, offset;
881 rtx src, dst, sreg, dreg, target;
882 int i;
883 if (GET_CODE (ops[2]) != CONST_INT
884 || GET_CODE (ops[3]) != CONST_INT
885 || INTVAL (ops[2]) > (HOST_WIDE_INT) (MOVE_RATIO (optimize_insn_for_speed_p ()) * 8))
886 return 0;
887
888 bytes = INTVAL (ops[2]);
889 align = INTVAL (ops[3]);
890
891 if (bytes <= 0)
892 return 1;
893
894 dst = ops[0];
895 src = ops[1];
896
897 if (align == 16)
898 {
899 for (offset = 0; offset + 16 <= bytes; offset += 16)
900 {
901 dst = adjust_address (ops[0], V16QImode, offset);
902 src = adjust_address (ops[1], V16QImode, offset);
903 emit_move_insn (dst, src);
904 }
905 if (offset < bytes)
906 {
907 rtx mask;
908 unsigned char arr[16] = { 0 };
909 for (i = 0; i < bytes - offset; i++)
910 arr[i] = 0xff;
911 dst = adjust_address (ops[0], V16QImode, offset);
912 src = adjust_address (ops[1], V16QImode, offset);
913 mask = gen_reg_rtx (V16QImode);
914 sreg = gen_reg_rtx (V16QImode);
915 dreg = gen_reg_rtx (V16QImode);
916 target = gen_reg_rtx (V16QImode);
917 emit_move_insn (mask, array_to_constant (V16QImode, arr));
918 emit_move_insn (dreg, dst);
919 emit_move_insn (sreg, src);
920 emit_insn (gen_selb (target, dreg, sreg, mask));
921 emit_move_insn (dst, target);
922 }
923 return 1;
924 }
925 return 0;
926 }
927
928 enum spu_comp_code
929 { SPU_EQ, SPU_GT, SPU_GTU };
930
931 int spu_comp_icode[12][3] = {
932 {CODE_FOR_ceq_qi, CODE_FOR_cgt_qi, CODE_FOR_clgt_qi},
933 {CODE_FOR_ceq_hi, CODE_FOR_cgt_hi, CODE_FOR_clgt_hi},
934 {CODE_FOR_ceq_si, CODE_FOR_cgt_si, CODE_FOR_clgt_si},
935 {CODE_FOR_ceq_di, CODE_FOR_cgt_di, CODE_FOR_clgt_di},
936 {CODE_FOR_ceq_ti, CODE_FOR_cgt_ti, CODE_FOR_clgt_ti},
937 {CODE_FOR_ceq_sf, CODE_FOR_cgt_sf, 0},
938 {CODE_FOR_ceq_df, CODE_FOR_cgt_df, 0},
939 {CODE_FOR_ceq_v16qi, CODE_FOR_cgt_v16qi, CODE_FOR_clgt_v16qi},
940 {CODE_FOR_ceq_v8hi, CODE_FOR_cgt_v8hi, CODE_FOR_clgt_v8hi},
941 {CODE_FOR_ceq_v4si, CODE_FOR_cgt_v4si, CODE_FOR_clgt_v4si},
942 {CODE_FOR_ceq_v4sf, CODE_FOR_cgt_v4sf, 0},
943 {CODE_FOR_ceq_v2df, CODE_FOR_cgt_v2df, 0},
944 };
945
946 /* Generate a compare for CODE. Return a brand-new rtx that represents
947 the result of the compare. GCC can figure this out too if we don't
948 provide all variations of compares, but GCC always wants to use
949 WORD_MODE, we can generate better code in most cases if we do it
950 ourselves. */
951 void
952 spu_emit_branch_or_set (int is_set, rtx cmp, rtx operands[])
953 {
954 int reverse_compare = 0;
955 int reverse_test = 0;
956 rtx compare_result, eq_result;
957 rtx comp_rtx, eq_rtx;
958 enum machine_mode comp_mode;
959 enum machine_mode op_mode;
960 enum spu_comp_code scode, eq_code;
961 enum insn_code ior_code;
962 enum rtx_code code = GET_CODE (cmp);
963 rtx op0 = XEXP (cmp, 0);
964 rtx op1 = XEXP (cmp, 1);
965 int index;
966 int eq_test = 0;
967
968 /* When op1 is a CONST_INT change (X >= C) to (X > C-1),
969 and so on, to keep the constant in operand 1. */
970 if (GET_CODE (op1) == CONST_INT)
971 {
972 HOST_WIDE_INT val = INTVAL (op1) - 1;
973 if (trunc_int_for_mode (val, GET_MODE (op0)) == val)
974 switch (code)
975 {
976 case GE:
977 op1 = GEN_INT (val);
978 code = GT;
979 break;
980 case LT:
981 op1 = GEN_INT (val);
982 code = LE;
983 break;
984 case GEU:
985 op1 = GEN_INT (val);
986 code = GTU;
987 break;
988 case LTU:
989 op1 = GEN_INT (val);
990 code = LEU;
991 break;
992 default:
993 break;
994 }
995 }
996
997 comp_mode = SImode;
998 op_mode = GET_MODE (op0);
999
1000 switch (code)
1001 {
1002 case GE:
1003 scode = SPU_GT;
1004 if (HONOR_NANS (op_mode))
1005 {
1006 reverse_compare = 0;
1007 reverse_test = 0;
1008 eq_test = 1;
1009 eq_code = SPU_EQ;
1010 }
1011 else
1012 {
1013 reverse_compare = 1;
1014 reverse_test = 1;
1015 }
1016 break;
1017 case LE:
1018 scode = SPU_GT;
1019 if (HONOR_NANS (op_mode))
1020 {
1021 reverse_compare = 1;
1022 reverse_test = 0;
1023 eq_test = 1;
1024 eq_code = SPU_EQ;
1025 }
1026 else
1027 {
1028 reverse_compare = 0;
1029 reverse_test = 1;
1030 }
1031 break;
1032 case LT:
1033 reverse_compare = 1;
1034 reverse_test = 0;
1035 scode = SPU_GT;
1036 break;
1037 case GEU:
1038 reverse_compare = 1;
1039 reverse_test = 1;
1040 scode = SPU_GTU;
1041 break;
1042 case LEU:
1043 reverse_compare = 0;
1044 reverse_test = 1;
1045 scode = SPU_GTU;
1046 break;
1047 case LTU:
1048 reverse_compare = 1;
1049 reverse_test = 0;
1050 scode = SPU_GTU;
1051 break;
1052 case NE:
1053 reverse_compare = 0;
1054 reverse_test = 1;
1055 scode = SPU_EQ;
1056 break;
1057
1058 case EQ:
1059 scode = SPU_EQ;
1060 break;
1061 case GT:
1062 scode = SPU_GT;
1063 break;
1064 case GTU:
1065 scode = SPU_GTU;
1066 break;
1067 default:
1068 scode = SPU_EQ;
1069 break;
1070 }
1071
1072 switch (op_mode)
1073 {
1074 case QImode:
1075 index = 0;
1076 comp_mode = QImode;
1077 break;
1078 case HImode:
1079 index = 1;
1080 comp_mode = HImode;
1081 break;
1082 case SImode:
1083 index = 2;
1084 break;
1085 case DImode:
1086 index = 3;
1087 break;
1088 case TImode:
1089 index = 4;
1090 break;
1091 case SFmode:
1092 index = 5;
1093 break;
1094 case DFmode:
1095 index = 6;
1096 break;
1097 case V16QImode:
1098 index = 7;
1099 comp_mode = op_mode;
1100 break;
1101 case V8HImode:
1102 index = 8;
1103 comp_mode = op_mode;
1104 break;
1105 case V4SImode:
1106 index = 9;
1107 comp_mode = op_mode;
1108 break;
1109 case V4SFmode:
1110 index = 10;
1111 comp_mode = V4SImode;
1112 break;
1113 case V2DFmode:
1114 index = 11;
1115 comp_mode = V2DImode;
1116 break;
1117 case V2DImode:
1118 default:
1119 abort ();
1120 }
1121
1122 if (GET_MODE (op1) == DFmode
1123 && (scode != SPU_GT && scode != SPU_EQ))
1124 abort ();
1125
1126 if (is_set == 0 && op1 == const0_rtx
1127 && (GET_MODE (op0) == SImode
1128 || GET_MODE (op0) == HImode) && scode == SPU_EQ)
1129 {
1130 /* Don't need to set a register with the result when we are
1131 comparing against zero and branching. */
1132 reverse_test = !reverse_test;
1133 compare_result = op0;
1134 }
1135 else
1136 {
1137 compare_result = gen_reg_rtx (comp_mode);
1138
1139 if (reverse_compare)
1140 {
1141 rtx t = op1;
1142 op1 = op0;
1143 op0 = t;
1144 }
1145
1146 if (spu_comp_icode[index][scode] == 0)
1147 abort ();
1148
1149 if (!(*insn_data[spu_comp_icode[index][scode]].operand[1].predicate)
1150 (op0, op_mode))
1151 op0 = force_reg (op_mode, op0);
1152 if (!(*insn_data[spu_comp_icode[index][scode]].operand[2].predicate)
1153 (op1, op_mode))
1154 op1 = force_reg (op_mode, op1);
1155 comp_rtx = GEN_FCN (spu_comp_icode[index][scode]) (compare_result,
1156 op0, op1);
1157 if (comp_rtx == 0)
1158 abort ();
1159 emit_insn (comp_rtx);
1160
1161 if (eq_test)
1162 {
1163 eq_result = gen_reg_rtx (comp_mode);
1164 eq_rtx = GEN_FCN (spu_comp_icode[index][eq_code]) (eq_result,
1165 op0, op1);
1166 if (eq_rtx == 0)
1167 abort ();
1168 emit_insn (eq_rtx);
1169 ior_code = optab_handler (ior_optab, comp_mode);
1170 gcc_assert (ior_code != CODE_FOR_nothing);
1171 emit_insn (GEN_FCN (ior_code)
1172 (compare_result, compare_result, eq_result));
1173 }
1174 }
1175
1176 if (is_set == 0)
1177 {
1178 rtx bcomp;
1179 rtx loc_ref;
1180
1181 /* We don't have branch on QI compare insns, so we convert the
1182 QI compare result to a HI result. */
1183 if (comp_mode == QImode)
1184 {
1185 rtx old_res = compare_result;
1186 compare_result = gen_reg_rtx (HImode);
1187 comp_mode = HImode;
1188 emit_insn (gen_extendqihi2 (compare_result, old_res));
1189 }
1190
1191 if (reverse_test)
1192 bcomp = gen_rtx_EQ (comp_mode, compare_result, const0_rtx);
1193 else
1194 bcomp = gen_rtx_NE (comp_mode, compare_result, const0_rtx);
1195
1196 loc_ref = gen_rtx_LABEL_REF (VOIDmode, operands[3]);
1197 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
1198 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
1199 loc_ref, pc_rtx)));
1200 }
1201 else if (is_set == 2)
1202 {
1203 rtx target = operands[0];
1204 int compare_size = GET_MODE_BITSIZE (comp_mode);
1205 int target_size = GET_MODE_BITSIZE (GET_MODE (target));
1206 enum machine_mode mode = mode_for_size (target_size, MODE_INT, 0);
1207 rtx select_mask;
1208 rtx op_t = operands[2];
1209 rtx op_f = operands[3];
1210
1211 /* The result of the comparison can be SI, HI or QI mode. Create a
1212 mask based on that result. */
1213 if (target_size > compare_size)
1214 {
1215 select_mask = gen_reg_rtx (mode);
1216 emit_insn (gen_extend_compare (select_mask, compare_result));
1217 }
1218 else if (target_size < compare_size)
1219 select_mask =
1220 gen_rtx_SUBREG (mode, compare_result,
1221 (compare_size - target_size) / BITS_PER_UNIT);
1222 else if (comp_mode != mode)
1223 select_mask = gen_rtx_SUBREG (mode, compare_result, 0);
1224 else
1225 select_mask = compare_result;
1226
1227 if (GET_MODE (target) != GET_MODE (op_t)
1228 || GET_MODE (target) != GET_MODE (op_f))
1229 abort ();
1230
1231 if (reverse_test)
1232 emit_insn (gen_selb (target, op_t, op_f, select_mask));
1233 else
1234 emit_insn (gen_selb (target, op_f, op_t, select_mask));
1235 }
1236 else
1237 {
1238 rtx target = operands[0];
1239 if (reverse_test)
1240 emit_insn (gen_rtx_SET (VOIDmode, compare_result,
1241 gen_rtx_NOT (comp_mode, compare_result)));
1242 if (GET_MODE (target) == SImode && GET_MODE (compare_result) == HImode)
1243 emit_insn (gen_extendhisi2 (target, compare_result));
1244 else if (GET_MODE (target) == SImode
1245 && GET_MODE (compare_result) == QImode)
1246 emit_insn (gen_extend_compare (target, compare_result));
1247 else
1248 emit_move_insn (target, compare_result);
1249 }
1250 }
1251
1252 HOST_WIDE_INT
1253 const_double_to_hwint (rtx x)
1254 {
1255 HOST_WIDE_INT val;
1256 REAL_VALUE_TYPE rv;
1257 if (GET_MODE (x) == SFmode)
1258 {
1259 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1260 REAL_VALUE_TO_TARGET_SINGLE (rv, val);
1261 }
1262 else if (GET_MODE (x) == DFmode)
1263 {
1264 long l[2];
1265 REAL_VALUE_FROM_CONST_DOUBLE (rv, x);
1266 REAL_VALUE_TO_TARGET_DOUBLE (rv, l);
1267 val = l[0];
1268 val = (val << 32) | (l[1] & 0xffffffff);
1269 }
1270 else
1271 abort ();
1272 return val;
1273 }
1274
1275 rtx
1276 hwint_to_const_double (enum machine_mode mode, HOST_WIDE_INT v)
1277 {
1278 long tv[2];
1279 REAL_VALUE_TYPE rv;
1280 gcc_assert (mode == SFmode || mode == DFmode);
1281
1282 if (mode == SFmode)
1283 tv[0] = (v << 32) >> 32;
1284 else if (mode == DFmode)
1285 {
1286 tv[1] = (v << 32) >> 32;
1287 tv[0] = v >> 32;
1288 }
1289 real_from_target (&rv, tv, mode);
1290 return CONST_DOUBLE_FROM_REAL_VALUE (rv, mode);
1291 }
1292
1293 void
1294 print_operand_address (FILE * file, register rtx addr)
1295 {
1296 rtx reg;
1297 rtx offset;
1298
1299 if (GET_CODE (addr) == AND
1300 && GET_CODE (XEXP (addr, 1)) == CONST_INT
1301 && INTVAL (XEXP (addr, 1)) == -16)
1302 addr = XEXP (addr, 0);
1303
1304 switch (GET_CODE (addr))
1305 {
1306 case REG:
1307 fprintf (file, "0(%s)", reg_names[REGNO (addr)]);
1308 break;
1309
1310 case PLUS:
1311 reg = XEXP (addr, 0);
1312 offset = XEXP (addr, 1);
1313 if (GET_CODE (offset) == REG)
1314 {
1315 fprintf (file, "%s,%s", reg_names[REGNO (reg)],
1316 reg_names[REGNO (offset)]);
1317 }
1318 else if (GET_CODE (offset) == CONST_INT)
1319 {
1320 fprintf (file, HOST_WIDE_INT_PRINT_DEC "(%s)",
1321 INTVAL (offset), reg_names[REGNO (reg)]);
1322 }
1323 else
1324 abort ();
1325 break;
1326
1327 case CONST:
1328 case LABEL_REF:
1329 case SYMBOL_REF:
1330 case CONST_INT:
1331 output_addr_const (file, addr);
1332 break;
1333
1334 default:
1335 debug_rtx (addr);
1336 abort ();
1337 }
1338 }
1339
1340 void
1341 print_operand (FILE * file, rtx x, int code)
1342 {
1343 enum machine_mode mode = GET_MODE (x);
1344 HOST_WIDE_INT val;
1345 unsigned char arr[16];
1346 int xcode = GET_CODE (x);
1347 int i, info;
1348 if (GET_MODE (x) == VOIDmode)
1349 switch (code)
1350 {
1351 case 'L': /* 128 bits, signed */
1352 case 'm': /* 128 bits, signed */
1353 case 'T': /* 128 bits, signed */
1354 case 't': /* 128 bits, signed */
1355 mode = TImode;
1356 break;
1357 case 'K': /* 64 bits, signed */
1358 case 'k': /* 64 bits, signed */
1359 case 'D': /* 64 bits, signed */
1360 case 'd': /* 64 bits, signed */
1361 mode = DImode;
1362 break;
1363 case 'J': /* 32 bits, signed */
1364 case 'j': /* 32 bits, signed */
1365 case 's': /* 32 bits, signed */
1366 case 'S': /* 32 bits, signed */
1367 mode = SImode;
1368 break;
1369 }
1370 switch (code)
1371 {
1372
1373 case 'j': /* 32 bits, signed */
1374 case 'k': /* 64 bits, signed */
1375 case 'm': /* 128 bits, signed */
1376 if (xcode == CONST_INT
1377 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1378 {
1379 gcc_assert (logical_immediate_p (x, mode));
1380 constant_to_array (mode, x, arr);
1381 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1382 val = trunc_int_for_mode (val, SImode);
1383 switch (which_logical_immediate (val))
1384 {
1385 case SPU_ORI:
1386 break;
1387 case SPU_ORHI:
1388 fprintf (file, "h");
1389 break;
1390 case SPU_ORBI:
1391 fprintf (file, "b");
1392 break;
1393 default:
1394 gcc_unreachable();
1395 }
1396 }
1397 else
1398 gcc_unreachable();
1399 return;
1400
1401 case 'J': /* 32 bits, signed */
1402 case 'K': /* 64 bits, signed */
1403 case 'L': /* 128 bits, signed */
1404 if (xcode == CONST_INT
1405 || xcode == CONST_DOUBLE || xcode == CONST_VECTOR)
1406 {
1407 gcc_assert (logical_immediate_p (x, mode)
1408 || iohl_immediate_p (x, mode));
1409 constant_to_array (mode, x, arr);
1410 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1411 val = trunc_int_for_mode (val, SImode);
1412 switch (which_logical_immediate (val))
1413 {
1414 case SPU_ORI:
1415 case SPU_IOHL:
1416 break;
1417 case SPU_ORHI:
1418 val = trunc_int_for_mode (val, HImode);
1419 break;
1420 case SPU_ORBI:
1421 val = trunc_int_for_mode (val, QImode);
1422 break;
1423 default:
1424 gcc_unreachable();
1425 }
1426 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1427 }
1428 else
1429 gcc_unreachable();
1430 return;
1431
1432 case 't': /* 128 bits, signed */
1433 case 'd': /* 64 bits, signed */
1434 case 's': /* 32 bits, signed */
1435 if (CONSTANT_P (x))
1436 {
1437 enum immediate_class c = classify_immediate (x, mode);
1438 switch (c)
1439 {
1440 case IC_IL1:
1441 constant_to_array (mode, x, arr);
1442 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1443 val = trunc_int_for_mode (val, SImode);
1444 switch (which_immediate_load (val))
1445 {
1446 case SPU_IL:
1447 break;
1448 case SPU_ILA:
1449 fprintf (file, "a");
1450 break;
1451 case SPU_ILH:
1452 fprintf (file, "h");
1453 break;
1454 case SPU_ILHU:
1455 fprintf (file, "hu");
1456 break;
1457 default:
1458 gcc_unreachable ();
1459 }
1460 break;
1461 case IC_CPAT:
1462 constant_to_array (mode, x, arr);
1463 cpat_info (arr, GET_MODE_SIZE (mode), &info, 0);
1464 if (info == 1)
1465 fprintf (file, "b");
1466 else if (info == 2)
1467 fprintf (file, "h");
1468 else if (info == 4)
1469 fprintf (file, "w");
1470 else if (info == 8)
1471 fprintf (file, "d");
1472 break;
1473 case IC_IL1s:
1474 if (xcode == CONST_VECTOR)
1475 {
1476 x = CONST_VECTOR_ELT (x, 0);
1477 xcode = GET_CODE (x);
1478 }
1479 if (xcode == SYMBOL_REF || xcode == LABEL_REF || xcode == CONST)
1480 fprintf (file, "a");
1481 else if (xcode == HIGH)
1482 fprintf (file, "hu");
1483 break;
1484 case IC_FSMBI:
1485 case IC_FSMBI2:
1486 case IC_IL2:
1487 case IC_IL2s:
1488 case IC_POOL:
1489 abort ();
1490 }
1491 }
1492 else
1493 gcc_unreachable ();
1494 return;
1495
1496 case 'T': /* 128 bits, signed */
1497 case 'D': /* 64 bits, signed */
1498 case 'S': /* 32 bits, signed */
1499 if (CONSTANT_P (x))
1500 {
1501 enum immediate_class c = classify_immediate (x, mode);
1502 switch (c)
1503 {
1504 case IC_IL1:
1505 constant_to_array (mode, x, arr);
1506 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
1507 val = trunc_int_for_mode (val, SImode);
1508 switch (which_immediate_load (val))
1509 {
1510 case SPU_IL:
1511 case SPU_ILA:
1512 break;
1513 case SPU_ILH:
1514 case SPU_ILHU:
1515 val = trunc_int_for_mode (((arr[0] << 8) | arr[1]), HImode);
1516 break;
1517 default:
1518 gcc_unreachable ();
1519 }
1520 fprintf (file, HOST_WIDE_INT_PRINT_DEC, val);
1521 break;
1522 case IC_FSMBI:
1523 constant_to_array (mode, x, arr);
1524 val = 0;
1525 for (i = 0; i < 16; i++)
1526 {
1527 val <<= 1;
1528 val |= arr[i] & 1;
1529 }
1530 print_operand (file, GEN_INT (val), 0);
1531 break;
1532 case IC_CPAT:
1533 constant_to_array (mode, x, arr);
1534 cpat_info (arr, GET_MODE_SIZE (mode), 0, &info);
1535 fprintf (file, HOST_WIDE_INT_PRINT_DEC, (HOST_WIDE_INT)info);
1536 break;
1537 case IC_IL1s:
1538 if (xcode == HIGH)
1539 x = XEXP (x, 0);
1540 if (GET_CODE (x) == CONST_VECTOR)
1541 x = CONST_VECTOR_ELT (x, 0);
1542 output_addr_const (file, x);
1543 if (xcode == HIGH)
1544 fprintf (file, "@h");
1545 break;
1546 case IC_IL2:
1547 case IC_IL2s:
1548 case IC_FSMBI2:
1549 case IC_POOL:
1550 abort ();
1551 }
1552 }
1553 else
1554 gcc_unreachable ();
1555 return;
1556
1557 case 'C':
1558 if (xcode == CONST_INT)
1559 {
1560 /* Only 4 least significant bits are relevant for generate
1561 control word instructions. */
1562 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x) & 15);
1563 return;
1564 }
1565 break;
1566
1567 case 'M': /* print code for c*d */
1568 if (GET_CODE (x) == CONST_INT)
1569 switch (INTVAL (x))
1570 {
1571 case 1:
1572 fprintf (file, "b");
1573 break;
1574 case 2:
1575 fprintf (file, "h");
1576 break;
1577 case 4:
1578 fprintf (file, "w");
1579 break;
1580 case 8:
1581 fprintf (file, "d");
1582 break;
1583 default:
1584 gcc_unreachable();
1585 }
1586 else
1587 gcc_unreachable();
1588 return;
1589
1590 case 'N': /* Negate the operand */
1591 if (xcode == CONST_INT)
1592 fprintf (file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (x));
1593 else if (xcode == CONST_VECTOR)
1594 fprintf (file, HOST_WIDE_INT_PRINT_DEC,
1595 -INTVAL (CONST_VECTOR_ELT (x, 0)));
1596 return;
1597
1598 case 'I': /* enable/disable interrupts */
1599 if (xcode == CONST_INT)
1600 fprintf (file, "%s", INTVAL (x) == 0 ? "d" : "e");
1601 return;
1602
1603 case 'b': /* branch modifiers */
1604 if (xcode == REG)
1605 fprintf (file, "%s", GET_MODE (x) == HImode ? "h" : "");
1606 else if (COMPARISON_P (x))
1607 fprintf (file, "%s", xcode == NE ? "n" : "");
1608 return;
1609
1610 case 'i': /* indirect call */
1611 if (xcode == MEM)
1612 {
1613 if (GET_CODE (XEXP (x, 0)) == REG)
1614 /* Used in indirect function calls. */
1615 fprintf (file, "%s", reg_names[REGNO (XEXP (x, 0))]);
1616 else
1617 output_address (XEXP (x, 0));
1618 }
1619 return;
1620
1621 case 'p': /* load/store */
1622 if (xcode == MEM)
1623 {
1624 x = XEXP (x, 0);
1625 xcode = GET_CODE (x);
1626 }
1627 if (xcode == AND)
1628 {
1629 x = XEXP (x, 0);
1630 xcode = GET_CODE (x);
1631 }
1632 if (xcode == REG)
1633 fprintf (file, "d");
1634 else if (xcode == CONST_INT)
1635 fprintf (file, "a");
1636 else if (xcode == CONST || xcode == SYMBOL_REF || xcode == LABEL_REF)
1637 fprintf (file, "r");
1638 else if (xcode == PLUS || xcode == LO_SUM)
1639 {
1640 if (GET_CODE (XEXP (x, 1)) == REG)
1641 fprintf (file, "x");
1642 else
1643 fprintf (file, "d");
1644 }
1645 return;
1646
1647 case 'e':
1648 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1649 val &= 0x7;
1650 output_addr_const (file, GEN_INT (val));
1651 return;
1652
1653 case 'f':
1654 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1655 val &= 0x1f;
1656 output_addr_const (file, GEN_INT (val));
1657 return;
1658
1659 case 'g':
1660 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1661 val &= 0x3f;
1662 output_addr_const (file, GEN_INT (val));
1663 return;
1664
1665 case 'h':
1666 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1667 val = (val >> 3) & 0x1f;
1668 output_addr_const (file, GEN_INT (val));
1669 return;
1670
1671 case 'E':
1672 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1673 val = -val;
1674 val &= 0x7;
1675 output_addr_const (file, GEN_INT (val));
1676 return;
1677
1678 case 'F':
1679 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1680 val = -val;
1681 val &= 0x1f;
1682 output_addr_const (file, GEN_INT (val));
1683 return;
1684
1685 case 'G':
1686 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1687 val = -val;
1688 val &= 0x3f;
1689 output_addr_const (file, GEN_INT (val));
1690 return;
1691
1692 case 'H':
1693 val = xcode == CONST_INT ? INTVAL (x) : INTVAL (CONST_VECTOR_ELT (x, 0));
1694 val = -(val & -8ll);
1695 val = (val >> 3) & 0x1f;
1696 output_addr_const (file, GEN_INT (val));
1697 return;
1698
1699 case 'v':
1700 case 'w':
1701 constant_to_array (mode, x, arr);
1702 val = (((arr[0] << 1) + (arr[1] >> 7)) & 0xff) - 127;
1703 output_addr_const (file, GEN_INT (code == 'w' ? -val : val));
1704 return;
1705
1706 case 0:
1707 if (xcode == REG)
1708 fprintf (file, "%s", reg_names[REGNO (x)]);
1709 else if (xcode == MEM)
1710 output_address (XEXP (x, 0));
1711 else if (xcode == CONST_VECTOR)
1712 print_operand (file, CONST_VECTOR_ELT (x, 0), 0);
1713 else
1714 output_addr_const (file, x);
1715 return;
1716
1717 /* unused letters
1718 o qr u yz
1719 AB OPQR UVWXYZ */
1720 default:
1721 output_operand_lossage ("invalid %%xn code");
1722 }
1723 gcc_unreachable ();
1724 }
1725
1726 /* For PIC mode we've reserved PIC_OFFSET_TABLE_REGNUM, which is a
1727 caller saved register. For leaf functions it is more efficient to
1728 use a volatile register because we won't need to save and restore the
1729 pic register. This routine is only valid after register allocation
1730 is completed, so we can pick an unused register. */
1731 static rtx
1732 get_pic_reg (void)
1733 {
1734 rtx pic_reg = pic_offset_table_rtx;
1735 if (!reload_completed && !reload_in_progress)
1736 abort ();
1737 if (current_function_is_leaf && !df_regs_ever_live_p (LAST_ARG_REGNUM))
1738 pic_reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
1739 return pic_reg;
1740 }
1741
1742 /* Split constant addresses to handle cases that are too large.
1743 Add in the pic register when in PIC mode.
1744 Split immediates that require more than 1 instruction. */
1745 int
1746 spu_split_immediate (rtx * ops)
1747 {
1748 enum machine_mode mode = GET_MODE (ops[0]);
1749 enum immediate_class c = classify_immediate (ops[1], mode);
1750
1751 switch (c)
1752 {
1753 case IC_IL2:
1754 {
1755 unsigned char arrhi[16];
1756 unsigned char arrlo[16];
1757 rtx to, temp, hi, lo;
1758 int i;
1759 enum machine_mode imode = mode;
1760 /* We need to do reals as ints because the constant used in the
1761 IOR might not be a legitimate real constant. */
1762 imode = int_mode_for_mode (mode);
1763 constant_to_array (mode, ops[1], arrhi);
1764 if (imode != mode)
1765 to = simplify_gen_subreg (imode, ops[0], mode, 0);
1766 else
1767 to = ops[0];
1768 temp = !can_create_pseudo_p () ? to : gen_reg_rtx (imode);
1769 for (i = 0; i < 16; i += 4)
1770 {
1771 arrlo[i + 2] = arrhi[i + 2];
1772 arrlo[i + 3] = arrhi[i + 3];
1773 arrlo[i + 0] = arrlo[i + 1] = 0;
1774 arrhi[i + 2] = arrhi[i + 3] = 0;
1775 }
1776 hi = array_to_constant (imode, arrhi);
1777 lo = array_to_constant (imode, arrlo);
1778 emit_move_insn (temp, hi);
1779 emit_insn (gen_rtx_SET
1780 (VOIDmode, to, gen_rtx_IOR (imode, temp, lo)));
1781 return 1;
1782 }
1783 case IC_FSMBI2:
1784 {
1785 unsigned char arr_fsmbi[16];
1786 unsigned char arr_andbi[16];
1787 rtx to, reg_fsmbi, reg_and;
1788 int i;
1789 enum machine_mode imode = mode;
1790 /* We need to do reals as ints because the constant used in the
1791 * AND might not be a legitimate real constant. */
1792 imode = int_mode_for_mode (mode);
1793 constant_to_array (mode, ops[1], arr_fsmbi);
1794 if (imode != mode)
1795 to = simplify_gen_subreg(imode, ops[0], GET_MODE (ops[0]), 0);
1796 else
1797 to = ops[0];
1798 for (i = 0; i < 16; i++)
1799 if (arr_fsmbi[i] != 0)
1800 {
1801 arr_andbi[0] = arr_fsmbi[i];
1802 arr_fsmbi[i] = 0xff;
1803 }
1804 for (i = 1; i < 16; i++)
1805 arr_andbi[i] = arr_andbi[0];
1806 reg_fsmbi = array_to_constant (imode, arr_fsmbi);
1807 reg_and = array_to_constant (imode, arr_andbi);
1808 emit_move_insn (to, reg_fsmbi);
1809 emit_insn (gen_rtx_SET
1810 (VOIDmode, to, gen_rtx_AND (imode, to, reg_and)));
1811 return 1;
1812 }
1813 case IC_POOL:
1814 if (reload_in_progress || reload_completed)
1815 {
1816 rtx mem = force_const_mem (mode, ops[1]);
1817 if (TARGET_LARGE_MEM)
1818 {
1819 rtx addr = gen_rtx_REG (Pmode, REGNO (ops[0]));
1820 emit_move_insn (addr, XEXP (mem, 0));
1821 mem = replace_equiv_address (mem, addr);
1822 }
1823 emit_move_insn (ops[0], mem);
1824 return 1;
1825 }
1826 break;
1827 case IC_IL1s:
1828 case IC_IL2s:
1829 if (reload_completed && GET_CODE (ops[1]) != HIGH)
1830 {
1831 if (c == IC_IL2s)
1832 {
1833 emit_move_insn (ops[0], gen_rtx_HIGH (mode, ops[1]));
1834 emit_move_insn (ops[0], gen_rtx_LO_SUM (mode, ops[0], ops[1]));
1835 }
1836 else if (flag_pic)
1837 emit_insn (gen_pic (ops[0], ops[1]));
1838 if (flag_pic)
1839 {
1840 rtx pic_reg = get_pic_reg ();
1841 emit_insn (gen_addsi3 (ops[0], ops[0], pic_reg));
1842 crtl->uses_pic_offset_table = 1;
1843 }
1844 return flag_pic || c == IC_IL2s;
1845 }
1846 break;
1847 case IC_IL1:
1848 case IC_FSMBI:
1849 case IC_CPAT:
1850 break;
1851 }
1852 return 0;
1853 }
1854
1855 /* SAVING is TRUE when we are generating the actual load and store
1856 instructions for REGNO. When determining the size of the stack
1857 needed for saving register we must allocate enough space for the
1858 worst case, because we don't always have the information early enough
1859 to not allocate it. But we can at least eliminate the actual loads
1860 and stores during the prologue/epilogue. */
1861 static int
1862 need_to_save_reg (int regno, int saving)
1863 {
1864 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
1865 return 1;
1866 if (flag_pic
1867 && regno == PIC_OFFSET_TABLE_REGNUM
1868 && (!saving || crtl->uses_pic_offset_table)
1869 && (!saving
1870 || !current_function_is_leaf || df_regs_ever_live_p (LAST_ARG_REGNUM)))
1871 return 1;
1872 return 0;
1873 }
1874
1875 /* This function is only correct starting with local register
1876 allocation */
1877 int
1878 spu_saved_regs_size (void)
1879 {
1880 int reg_save_size = 0;
1881 int regno;
1882
1883 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; --regno)
1884 if (need_to_save_reg (regno, 0))
1885 reg_save_size += 0x10;
1886 return reg_save_size;
1887 }
1888
1889 static rtx
1890 frame_emit_store (int regno, rtx addr, HOST_WIDE_INT offset)
1891 {
1892 rtx reg = gen_rtx_REG (V4SImode, regno);
1893 rtx mem =
1894 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1895 return emit_insn (gen_movv4si (mem, reg));
1896 }
1897
1898 static rtx
1899 frame_emit_load (int regno, rtx addr, HOST_WIDE_INT offset)
1900 {
1901 rtx reg = gen_rtx_REG (V4SImode, regno);
1902 rtx mem =
1903 gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, addr, GEN_INT (offset)));
1904 return emit_insn (gen_movv4si (reg, mem));
1905 }
1906
1907 /* This happens after reload, so we need to expand it. */
1908 static rtx
1909 frame_emit_add_imm (rtx dst, rtx src, HOST_WIDE_INT imm, rtx scratch)
1910 {
1911 rtx insn;
1912 if (satisfies_constraint_K (GEN_INT (imm)))
1913 {
1914 insn = emit_insn (gen_addsi3 (dst, src, GEN_INT (imm)));
1915 }
1916 else
1917 {
1918 emit_insn (gen_movsi (scratch, gen_int_mode (imm, SImode)));
1919 insn = emit_insn (gen_addsi3 (dst, src, scratch));
1920 if (REGNO (src) == REGNO (scratch))
1921 abort ();
1922 }
1923 return insn;
1924 }
1925
1926 /* Return nonzero if this function is known to have a null epilogue. */
1927
1928 int
1929 direct_return (void)
1930 {
1931 if (reload_completed)
1932 {
1933 if (cfun->static_chain_decl == 0
1934 && (spu_saved_regs_size ()
1935 + get_frame_size ()
1936 + crtl->outgoing_args_size
1937 + crtl->args.pretend_args_size == 0)
1938 && current_function_is_leaf)
1939 return 1;
1940 }
1941 return 0;
1942 }
1943
1944 /*
1945 The stack frame looks like this:
1946 +-------------+
1947 | incoming |
1948 | args |
1949 AP -> +-------------+
1950 | $lr save |
1951 +-------------+
1952 prev SP | back chain |
1953 +-------------+
1954 | var args |
1955 | reg save | crtl->args.pretend_args_size bytes
1956 +-------------+
1957 | ... |
1958 | saved regs | spu_saved_regs_size() bytes
1959 FP -> +-------------+
1960 | ... |
1961 | vars | get_frame_size() bytes
1962 HFP -> +-------------+
1963 | ... |
1964 | outgoing |
1965 | args | crtl->outgoing_args_size bytes
1966 +-------------+
1967 | $lr of next |
1968 | frame |
1969 +-------------+
1970 | back chain |
1971 SP -> +-------------+
1972
1973 */
1974 void
1975 spu_expand_prologue (void)
1976 {
1977 HOST_WIDE_INT size = get_frame_size (), offset, regno;
1978 HOST_WIDE_INT total_size;
1979 HOST_WIDE_INT saved_regs_size;
1980 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
1981 rtx scratch_reg_0, scratch_reg_1;
1982 rtx insn, real;
1983
1984 if (flag_pic && optimize == 0)
1985 crtl->uses_pic_offset_table = 1;
1986
1987 if (spu_naked_function_p (current_function_decl))
1988 return;
1989
1990 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
1991 scratch_reg_1 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 2);
1992
1993 saved_regs_size = spu_saved_regs_size ();
1994 total_size = size + saved_regs_size
1995 + crtl->outgoing_args_size
1996 + crtl->args.pretend_args_size;
1997
1998 if (!current_function_is_leaf
1999 || cfun->calls_alloca || total_size > 0)
2000 total_size += STACK_POINTER_OFFSET;
2001
2002 /* Save this first because code after this might use the link
2003 register as a scratch register. */
2004 if (!current_function_is_leaf)
2005 {
2006 insn = frame_emit_store (LINK_REGISTER_REGNUM, sp_reg, 16);
2007 RTX_FRAME_RELATED_P (insn) = 1;
2008 }
2009
2010 if (total_size > 0)
2011 {
2012 offset = -crtl->args.pretend_args_size;
2013 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2014 if (need_to_save_reg (regno, 1))
2015 {
2016 offset -= 16;
2017 insn = frame_emit_store (regno, sp_reg, offset);
2018 RTX_FRAME_RELATED_P (insn) = 1;
2019 }
2020 }
2021
2022 if (flag_pic && crtl->uses_pic_offset_table)
2023 {
2024 rtx pic_reg = get_pic_reg ();
2025 insn = emit_insn (gen_load_pic_offset (pic_reg, scratch_reg_0));
2026 insn = emit_insn (gen_subsi3 (pic_reg, pic_reg, scratch_reg_0));
2027 }
2028
2029 if (total_size > 0)
2030 {
2031 if (flag_stack_check)
2032 {
2033 /* We compare against total_size-1 because
2034 ($sp >= total_size) <=> ($sp > total_size-1) */
2035 rtx scratch_v4si = gen_rtx_REG (V4SImode, REGNO (scratch_reg_0));
2036 rtx sp_v4si = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
2037 rtx size_v4si = spu_const (V4SImode, total_size - 1);
2038 if (!satisfies_constraint_K (GEN_INT (total_size - 1)))
2039 {
2040 emit_move_insn (scratch_v4si, size_v4si);
2041 size_v4si = scratch_v4si;
2042 }
2043 emit_insn (gen_cgt_v4si (scratch_v4si, sp_v4si, size_v4si));
2044 emit_insn (gen_vec_extractv4si
2045 (scratch_reg_0, scratch_v4si, GEN_INT (1)));
2046 emit_insn (gen_spu_heq (scratch_reg_0, GEN_INT (0)));
2047 }
2048
2049 /* Adjust the stack pointer, and make sure scratch_reg_0 contains
2050 the value of the previous $sp because we save it as the back
2051 chain. */
2052 if (total_size <= 2000)
2053 {
2054 /* In this case we save the back chain first. */
2055 insn = frame_emit_store (STACK_POINTER_REGNUM, sp_reg, -total_size);
2056 insn =
2057 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_0);
2058 }
2059 else
2060 {
2061 insn = emit_move_insn (scratch_reg_0, sp_reg);
2062 insn =
2063 frame_emit_add_imm (sp_reg, sp_reg, -total_size, scratch_reg_1);
2064 }
2065 RTX_FRAME_RELATED_P (insn) = 1;
2066 real = gen_addsi3 (sp_reg, sp_reg, GEN_INT (-total_size));
2067 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2068
2069 if (total_size > 2000)
2070 {
2071 /* Save the back chain ptr */
2072 insn = frame_emit_store (REGNO (scratch_reg_0), sp_reg, 0);
2073 }
2074
2075 if (frame_pointer_needed)
2076 {
2077 rtx fp_reg = gen_rtx_REG (Pmode, HARD_FRAME_POINTER_REGNUM);
2078 HOST_WIDE_INT fp_offset = STACK_POINTER_OFFSET
2079 + crtl->outgoing_args_size;
2080 /* Set the new frame_pointer */
2081 insn = frame_emit_add_imm (fp_reg, sp_reg, fp_offset, scratch_reg_0);
2082 RTX_FRAME_RELATED_P (insn) = 1;
2083 real = gen_addsi3 (fp_reg, sp_reg, GEN_INT (fp_offset));
2084 add_reg_note (insn, REG_FRAME_RELATED_EXPR, real);
2085 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = STACK_BOUNDARY;
2086 }
2087 }
2088
2089 }
2090
2091 void
2092 spu_expand_epilogue (bool sibcall_p)
2093 {
2094 int size = get_frame_size (), offset, regno;
2095 HOST_WIDE_INT saved_regs_size, total_size;
2096 rtx sp_reg = gen_rtx_REG (Pmode, STACK_POINTER_REGNUM);
2097 rtx jump, scratch_reg_0;
2098
2099 if (spu_naked_function_p (current_function_decl))
2100 return;
2101
2102 scratch_reg_0 = gen_rtx_REG (SImode, LAST_ARG_REGNUM + 1);
2103
2104 saved_regs_size = spu_saved_regs_size ();
2105 total_size = size + saved_regs_size
2106 + crtl->outgoing_args_size
2107 + crtl->args.pretend_args_size;
2108
2109 if (!current_function_is_leaf
2110 || cfun->calls_alloca || total_size > 0)
2111 total_size += STACK_POINTER_OFFSET;
2112
2113 if (total_size > 0)
2114 {
2115 if (cfun->calls_alloca)
2116 frame_emit_load (STACK_POINTER_REGNUM, sp_reg, 0);
2117 else
2118 frame_emit_add_imm (sp_reg, sp_reg, total_size, scratch_reg_0);
2119
2120
2121 if (saved_regs_size > 0)
2122 {
2123 offset = -crtl->args.pretend_args_size;
2124 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; ++regno)
2125 if (need_to_save_reg (regno, 1))
2126 {
2127 offset -= 0x10;
2128 frame_emit_load (regno, sp_reg, offset);
2129 }
2130 }
2131 }
2132
2133 if (!current_function_is_leaf)
2134 frame_emit_load (LINK_REGISTER_REGNUM, sp_reg, 16);
2135
2136 if (!sibcall_p)
2137 {
2138 emit_use (gen_rtx_REG (SImode, LINK_REGISTER_REGNUM));
2139 jump = emit_jump_insn (gen__return ());
2140 emit_barrier_after (jump);
2141 }
2142
2143 }
2144
2145 rtx
2146 spu_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
2147 {
2148 if (count != 0)
2149 return 0;
2150 /* This is inefficient because it ends up copying to a save-register
2151 which then gets saved even though $lr has already been saved. But
2152 it does generate better code for leaf functions and we don't need
2153 to use RETURN_ADDRESS_POINTER_REGNUM to get it working. It's only
2154 used for __builtin_return_address anyway, so maybe we don't care if
2155 it's inefficient. */
2156 return get_hard_reg_initial_val (Pmode, LINK_REGISTER_REGNUM);
2157 }
2158 \f
2159
2160 /* Given VAL, generate a constant appropriate for MODE.
2161 If MODE is a vector mode, every element will be VAL.
2162 For TImode, VAL will be zero extended to 128 bits. */
2163 rtx
2164 spu_const (enum machine_mode mode, HOST_WIDE_INT val)
2165 {
2166 rtx inner;
2167 rtvec v;
2168 int units, i;
2169
2170 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
2171 || GET_MODE_CLASS (mode) == MODE_FLOAT
2172 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
2173 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT);
2174
2175 if (GET_MODE_CLASS (mode) == MODE_INT)
2176 return immed_double_const (val, 0, mode);
2177
2178 /* val is the bit representation of the float */
2179 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2180 return hwint_to_const_double (mode, val);
2181
2182 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
2183 inner = immed_double_const (val, 0, GET_MODE_INNER (mode));
2184 else
2185 inner = hwint_to_const_double (GET_MODE_INNER (mode), val);
2186
2187 units = GET_MODE_NUNITS (mode);
2188
2189 v = rtvec_alloc (units);
2190
2191 for (i = 0; i < units; ++i)
2192 RTVEC_ELT (v, i) = inner;
2193
2194 return gen_rtx_CONST_VECTOR (mode, v);
2195 }
2196
2197 /* Create a MODE vector constant from 4 ints. */
2198 rtx
2199 spu_const_from_ints(enum machine_mode mode, int a, int b, int c, int d)
2200 {
2201 unsigned char arr[16];
2202 arr[0] = (a >> 24) & 0xff;
2203 arr[1] = (a >> 16) & 0xff;
2204 arr[2] = (a >> 8) & 0xff;
2205 arr[3] = (a >> 0) & 0xff;
2206 arr[4] = (b >> 24) & 0xff;
2207 arr[5] = (b >> 16) & 0xff;
2208 arr[6] = (b >> 8) & 0xff;
2209 arr[7] = (b >> 0) & 0xff;
2210 arr[8] = (c >> 24) & 0xff;
2211 arr[9] = (c >> 16) & 0xff;
2212 arr[10] = (c >> 8) & 0xff;
2213 arr[11] = (c >> 0) & 0xff;
2214 arr[12] = (d >> 24) & 0xff;
2215 arr[13] = (d >> 16) & 0xff;
2216 arr[14] = (d >> 8) & 0xff;
2217 arr[15] = (d >> 0) & 0xff;
2218 return array_to_constant(mode, arr);
2219 }
2220 \f
2221 /* branch hint stuff */
2222
2223 /* An array of these is used to propagate hints to predecessor blocks. */
2224 struct spu_bb_info
2225 {
2226 rtx prop_jump; /* propagated from another block */
2227 int bb_index; /* the original block. */
2228 };
2229 static struct spu_bb_info *spu_bb_info;
2230
2231 #define STOP_HINT_P(INSN) \
2232 (GET_CODE(INSN) == CALL_INSN \
2233 || INSN_CODE(INSN) == CODE_FOR_divmodsi4 \
2234 || INSN_CODE(INSN) == CODE_FOR_udivmodsi4)
2235
2236 /* 1 when RTX is a hinted branch or its target. We keep track of
2237 what has been hinted so the safe-hint code can test it easily. */
2238 #define HINTED_P(RTX) \
2239 (RTL_FLAG_CHECK3("HINTED_P", (RTX), CODE_LABEL, JUMP_INSN, CALL_INSN)->unchanging)
2240
2241 /* 1 when RTX is an insn that must be scheduled on an even boundary. */
2242 #define SCHED_ON_EVEN_P(RTX) \
2243 (RTL_FLAG_CHECK2("SCHED_ON_EVEN_P", (RTX), JUMP_INSN, CALL_INSN)->in_struct)
2244
2245 /* Emit a nop for INSN such that the two will dual issue. This assumes
2246 INSN is 8-byte aligned. When INSN is inline asm we emit an lnop.
2247 We check for TImode to handle a MULTI1 insn which has dual issued its
2248 first instruction. get_pipe returns -1 for MULTI0, inline asm, or
2249 ADDR_VEC insns. */
2250 static void
2251 emit_nop_for_insn (rtx insn)
2252 {
2253 int p;
2254 rtx new_insn;
2255 p = get_pipe (insn);
2256 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2257 new_insn = emit_insn_after (gen_lnop (), insn);
2258 else if (p == 1 && GET_MODE (insn) == TImode)
2259 {
2260 new_insn = emit_insn_before (gen_nopn (GEN_INT (127)), insn);
2261 PUT_MODE (new_insn, TImode);
2262 PUT_MODE (insn, VOIDmode);
2263 }
2264 else
2265 new_insn = emit_insn_after (gen_lnop (), insn);
2266 recog_memoized (new_insn);
2267 }
2268
2269 /* Insert nops in basic blocks to meet dual issue alignment
2270 requirements. Also make sure hbrp and hint instructions are at least
2271 one cycle apart, possibly inserting a nop. */
2272 static void
2273 pad_bb(void)
2274 {
2275 rtx insn, next_insn, prev_insn, hbr_insn = 0;
2276 int length;
2277 int addr;
2278
2279 /* This sets up INSN_ADDRESSES. */
2280 shorten_branches (get_insns ());
2281
2282 /* Keep track of length added by nops. */
2283 length = 0;
2284
2285 prev_insn = 0;
2286 insn = get_insns ();
2287 if (!active_insn_p (insn))
2288 insn = next_active_insn (insn);
2289 for (; insn; insn = next_insn)
2290 {
2291 next_insn = next_active_insn (insn);
2292 if (INSN_CODE (insn) == CODE_FOR_iprefetch
2293 || INSN_CODE (insn) == CODE_FOR_hbr)
2294 {
2295 if (hbr_insn)
2296 {
2297 int a0 = INSN_ADDRESSES (INSN_UID (hbr_insn));
2298 int a1 = INSN_ADDRESSES (INSN_UID (insn));
2299 if ((a1 - a0 == 8 && GET_MODE (insn) != TImode)
2300 || (a1 - a0 == 4))
2301 {
2302 prev_insn = emit_insn_before (gen_lnop (), insn);
2303 PUT_MODE (prev_insn, GET_MODE (insn));
2304 PUT_MODE (insn, TImode);
2305 length += 4;
2306 }
2307 }
2308 hbr_insn = insn;
2309 }
2310 if (INSN_CODE (insn) == CODE_FOR_blockage)
2311 {
2312 if (GET_MODE (insn) == TImode)
2313 PUT_MODE (next_insn, TImode);
2314 insn = next_insn;
2315 next_insn = next_active_insn (insn);
2316 }
2317 addr = INSN_ADDRESSES (INSN_UID (insn));
2318 if ((CALL_P (insn) || JUMP_P (insn)) && SCHED_ON_EVEN_P (insn))
2319 {
2320 if (((addr + length) & 7) != 0)
2321 {
2322 emit_nop_for_insn (prev_insn);
2323 length += 4;
2324 }
2325 }
2326 else if (GET_MODE (insn) == TImode
2327 && ((next_insn && GET_MODE (next_insn) != TImode)
2328 || get_attr_type (insn) == TYPE_MULTI0)
2329 && ((addr + length) & 7) != 0)
2330 {
2331 /* prev_insn will always be set because the first insn is
2332 always 8-byte aligned. */
2333 emit_nop_for_insn (prev_insn);
2334 length += 4;
2335 }
2336 prev_insn = insn;
2337 }
2338 }
2339
2340 \f
2341 /* Routines for branch hints. */
2342
2343 static void
2344 spu_emit_branch_hint (rtx before, rtx branch, rtx target,
2345 int distance, sbitmap blocks)
2346 {
2347 rtx branch_label = 0;
2348 rtx hint;
2349 rtx insn;
2350 rtx table;
2351
2352 if (before == 0 || branch == 0 || target == 0)
2353 return;
2354
2355 /* While scheduling we require hints to be no further than 600, so
2356 we need to enforce that here too */
2357 if (distance > 600)
2358 return;
2359
2360 /* If we have a Basic block note, emit it after the basic block note. */
2361 if (NOTE_INSN_BASIC_BLOCK_P (before))
2362 before = NEXT_INSN (before);
2363
2364 branch_label = gen_label_rtx ();
2365 LABEL_NUSES (branch_label)++;
2366 LABEL_PRESERVE_P (branch_label) = 1;
2367 insn = emit_label_before (branch_label, branch);
2368 branch_label = gen_rtx_LABEL_REF (VOIDmode, branch_label);
2369 SET_BIT (blocks, BLOCK_FOR_INSN (branch)->index);
2370
2371 hint = emit_insn_before (gen_hbr (branch_label, target), before);
2372 recog_memoized (hint);
2373 HINTED_P (branch) = 1;
2374
2375 if (GET_CODE (target) == LABEL_REF)
2376 HINTED_P (XEXP (target, 0)) = 1;
2377 else if (tablejump_p (branch, 0, &table))
2378 {
2379 rtvec vec;
2380 int j;
2381 if (GET_CODE (PATTERN (table)) == ADDR_VEC)
2382 vec = XVEC (PATTERN (table), 0);
2383 else
2384 vec = XVEC (PATTERN (table), 1);
2385 for (j = GET_NUM_ELEM (vec) - 1; j >= 0; --j)
2386 HINTED_P (XEXP (RTVEC_ELT (vec, j), 0)) = 1;
2387 }
2388
2389 if (distance >= 588)
2390 {
2391 /* Make sure the hint isn't scheduled any earlier than this point,
2392 which could make it too far for the branch offest to fit */
2393 recog_memoized (emit_insn_before (gen_blockage (), hint));
2394 }
2395 else if (distance <= 8 * 4)
2396 {
2397 /* To guarantee at least 8 insns between the hint and branch we
2398 insert nops. */
2399 int d;
2400 for (d = distance; d < 8 * 4; d += 4)
2401 {
2402 insn =
2403 emit_insn_after (gen_nopn_nv (gen_rtx_REG (SImode, 127)), hint);
2404 recog_memoized (insn);
2405 }
2406
2407 /* Make sure any nops inserted aren't scheduled before the hint. */
2408 recog_memoized (emit_insn_after (gen_blockage (), hint));
2409
2410 /* Make sure any nops inserted aren't scheduled after the call. */
2411 if (CALL_P (branch) && distance < 8 * 4)
2412 recog_memoized (emit_insn_before (gen_blockage (), branch));
2413 }
2414 }
2415
2416 /* Returns 0 if we don't want a hint for this branch. Otherwise return
2417 the rtx for the branch target. */
2418 static rtx
2419 get_branch_target (rtx branch)
2420 {
2421 if (GET_CODE (branch) == JUMP_INSN)
2422 {
2423 rtx set, src;
2424
2425 /* Return statements */
2426 if (GET_CODE (PATTERN (branch)) == RETURN)
2427 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2428
2429 /* jump table */
2430 if (GET_CODE (PATTERN (branch)) == ADDR_VEC
2431 || GET_CODE (PATTERN (branch)) == ADDR_DIFF_VEC)
2432 return 0;
2433
2434 /* ASM GOTOs. */
2435 if (extract_asm_operands (PATTERN (branch)) != NULL)
2436 return NULL;
2437
2438 set = single_set (branch);
2439 src = SET_SRC (set);
2440 if (GET_CODE (SET_DEST (set)) != PC)
2441 abort ();
2442
2443 if (GET_CODE (src) == IF_THEN_ELSE)
2444 {
2445 rtx lab = 0;
2446 rtx note = find_reg_note (branch, REG_BR_PROB, 0);
2447 if (note)
2448 {
2449 /* If the more probable case is not a fall through, then
2450 try a branch hint. */
2451 HOST_WIDE_INT prob = INTVAL (XEXP (note, 0));
2452 if (prob > (REG_BR_PROB_BASE * 6 / 10)
2453 && GET_CODE (XEXP (src, 1)) != PC)
2454 lab = XEXP (src, 1);
2455 else if (prob < (REG_BR_PROB_BASE * 4 / 10)
2456 && GET_CODE (XEXP (src, 2)) != PC)
2457 lab = XEXP (src, 2);
2458 }
2459 if (lab)
2460 {
2461 if (GET_CODE (lab) == RETURN)
2462 return gen_rtx_REG (SImode, LINK_REGISTER_REGNUM);
2463 return lab;
2464 }
2465 return 0;
2466 }
2467
2468 return src;
2469 }
2470 else if (GET_CODE (branch) == CALL_INSN)
2471 {
2472 rtx call;
2473 /* All of our call patterns are in a PARALLEL and the CALL is
2474 the first pattern in the PARALLEL. */
2475 if (GET_CODE (PATTERN (branch)) != PARALLEL)
2476 abort ();
2477 call = XVECEXP (PATTERN (branch), 0, 0);
2478 if (GET_CODE (call) == SET)
2479 call = SET_SRC (call);
2480 if (GET_CODE (call) != CALL)
2481 abort ();
2482 return XEXP (XEXP (call, 0), 0);
2483 }
2484 return 0;
2485 }
2486
2487 /* The special $hbr register is used to prevent the insn scheduler from
2488 moving hbr insns across instructions which invalidate them. It
2489 should only be used in a clobber, and this function searches for
2490 insns which clobber it. */
2491 static bool
2492 insn_clobbers_hbr (rtx insn)
2493 {
2494 if (INSN_P (insn)
2495 && GET_CODE (PATTERN (insn)) == PARALLEL)
2496 {
2497 rtx parallel = PATTERN (insn);
2498 rtx clobber;
2499 int j;
2500 for (j = XVECLEN (parallel, 0) - 1; j >= 0; j--)
2501 {
2502 clobber = XVECEXP (parallel, 0, j);
2503 if (GET_CODE (clobber) == CLOBBER
2504 && GET_CODE (XEXP (clobber, 0)) == REG
2505 && REGNO (XEXP (clobber, 0)) == HBR_REGNUM)
2506 return 1;
2507 }
2508 }
2509 return 0;
2510 }
2511
2512 /* Search up to 32 insns starting at FIRST:
2513 - at any kind of hinted branch, just return
2514 - at any unconditional branch in the first 15 insns, just return
2515 - at a call or indirect branch, after the first 15 insns, force it to
2516 an even address and return
2517 - at any unconditional branch, after the first 15 insns, force it to
2518 an even address.
2519 At then end of the search, insert an hbrp within 4 insns of FIRST,
2520 and an hbrp within 16 instructions of FIRST.
2521 */
2522 static void
2523 insert_hbrp_for_ilb_runout (rtx first)
2524 {
2525 rtx insn, before_4 = 0, before_16 = 0;
2526 int addr = 0, length, first_addr = -1;
2527 int hbrp_addr0 = 128 * 4, hbrp_addr1 = 128 * 4;
2528 int insert_lnop_after = 0;
2529 for (insn = first; insn; insn = NEXT_INSN (insn))
2530 if (INSN_P (insn))
2531 {
2532 if (first_addr == -1)
2533 first_addr = INSN_ADDRESSES (INSN_UID (insn));
2534 addr = INSN_ADDRESSES (INSN_UID (insn)) - first_addr;
2535 length = get_attr_length (insn);
2536
2537 if (before_4 == 0 && addr + length >= 4 * 4)
2538 before_4 = insn;
2539 /* We test for 14 instructions because the first hbrp will add
2540 up to 2 instructions. */
2541 if (before_16 == 0 && addr + length >= 14 * 4)
2542 before_16 = insn;
2543
2544 if (INSN_CODE (insn) == CODE_FOR_hbr)
2545 {
2546 /* Make sure an hbrp is at least 2 cycles away from a hint.
2547 Insert an lnop after the hbrp when necessary. */
2548 if (before_4 == 0 && addr > 0)
2549 {
2550 before_4 = insn;
2551 insert_lnop_after |= 1;
2552 }
2553 else if (before_4 && addr <= 4 * 4)
2554 insert_lnop_after |= 1;
2555 if (before_16 == 0 && addr > 10 * 4)
2556 {
2557 before_16 = insn;
2558 insert_lnop_after |= 2;
2559 }
2560 else if (before_16 && addr <= 14 * 4)
2561 insert_lnop_after |= 2;
2562 }
2563
2564 if (INSN_CODE (insn) == CODE_FOR_iprefetch)
2565 {
2566 if (addr < hbrp_addr0)
2567 hbrp_addr0 = addr;
2568 else if (addr < hbrp_addr1)
2569 hbrp_addr1 = addr;
2570 }
2571
2572 if (CALL_P (insn) || JUMP_P (insn))
2573 {
2574 if (HINTED_P (insn))
2575 return;
2576
2577 /* Any branch after the first 15 insns should be on an even
2578 address to avoid a special case branch. There might be
2579 some nops and/or hbrps inserted, so we test after 10
2580 insns. */
2581 if (addr > 10 * 4)
2582 SCHED_ON_EVEN_P (insn) = 1;
2583 }
2584
2585 if (CALL_P (insn) || tablejump_p (insn, 0, 0))
2586 return;
2587
2588
2589 if (addr + length >= 32 * 4)
2590 {
2591 gcc_assert (before_4 && before_16);
2592 if (hbrp_addr0 > 4 * 4)
2593 {
2594 insn =
2595 emit_insn_before (gen_iprefetch (GEN_INT (1)), before_4);
2596 recog_memoized (insn);
2597 INSN_ADDRESSES_NEW (insn,
2598 INSN_ADDRESSES (INSN_UID (before_4)));
2599 PUT_MODE (insn, GET_MODE (before_4));
2600 PUT_MODE (before_4, TImode);
2601 if (insert_lnop_after & 1)
2602 {
2603 insn = emit_insn_before (gen_lnop (), before_4);
2604 recog_memoized (insn);
2605 INSN_ADDRESSES_NEW (insn,
2606 INSN_ADDRESSES (INSN_UID (before_4)));
2607 PUT_MODE (insn, TImode);
2608 }
2609 }
2610 if ((hbrp_addr0 <= 4 * 4 || hbrp_addr0 > 16 * 4)
2611 && hbrp_addr1 > 16 * 4)
2612 {
2613 insn =
2614 emit_insn_before (gen_iprefetch (GEN_INT (2)), before_16);
2615 recog_memoized (insn);
2616 INSN_ADDRESSES_NEW (insn,
2617 INSN_ADDRESSES (INSN_UID (before_16)));
2618 PUT_MODE (insn, GET_MODE (before_16));
2619 PUT_MODE (before_16, TImode);
2620 if (insert_lnop_after & 2)
2621 {
2622 insn = emit_insn_before (gen_lnop (), before_16);
2623 recog_memoized (insn);
2624 INSN_ADDRESSES_NEW (insn,
2625 INSN_ADDRESSES (INSN_UID
2626 (before_16)));
2627 PUT_MODE (insn, TImode);
2628 }
2629 }
2630 return;
2631 }
2632 }
2633 else if (BARRIER_P (insn))
2634 return;
2635
2636 }
2637
2638 /* The SPU might hang when it executes 48 inline instructions after a
2639 hinted branch jumps to its hinted target. The beginning of a
2640 function and the return from a call might have been hinted, and must
2641 be handled as well. To prevent a hang we insert 2 hbrps. The first
2642 should be within 6 insns of the branch target. The second should be
2643 within 22 insns of the branch target. When determining if hbrps are
2644 necessary, we look for only 32 inline instructions, because up to to
2645 12 nops and 4 hbrps could be inserted. Similarily, when inserting
2646 new hbrps, we insert them within 4 and 16 insns of the target. */
2647 static void
2648 insert_hbrp (void)
2649 {
2650 rtx insn;
2651 if (TARGET_SAFE_HINTS)
2652 {
2653 shorten_branches (get_insns ());
2654 /* Insert hbrp at beginning of function */
2655 insn = next_active_insn (get_insns ());
2656 if (insn)
2657 insert_hbrp_for_ilb_runout (insn);
2658 /* Insert hbrp after hinted targets. */
2659 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2660 if ((LABEL_P (insn) && HINTED_P (insn)) || CALL_P (insn))
2661 insert_hbrp_for_ilb_runout (next_active_insn (insn));
2662 }
2663 }
2664
2665 static int in_spu_reorg;
2666
2667 /* Insert branch hints. There are no branch optimizations after this
2668 pass, so it's safe to set our branch hints now. */
2669 static void
2670 spu_machine_dependent_reorg (void)
2671 {
2672 sbitmap blocks;
2673 basic_block bb;
2674 rtx branch, insn;
2675 rtx branch_target = 0;
2676 int branch_addr = 0, insn_addr, required_dist = 0;
2677 int i;
2678 unsigned int j;
2679
2680 if (!TARGET_BRANCH_HINTS || optimize == 0)
2681 {
2682 /* We still do it for unoptimized code because an external
2683 function might have hinted a call or return. */
2684 insert_hbrp ();
2685 pad_bb ();
2686 return;
2687 }
2688
2689 blocks = sbitmap_alloc (last_basic_block);
2690 sbitmap_zero (blocks);
2691
2692 in_spu_reorg = 1;
2693 compute_bb_for_insn ();
2694
2695 compact_blocks ();
2696
2697 spu_bb_info =
2698 (struct spu_bb_info *) xcalloc (n_basic_blocks,
2699 sizeof (struct spu_bb_info));
2700
2701 /* We need exact insn addresses and lengths. */
2702 shorten_branches (get_insns ());
2703
2704 for (i = n_basic_blocks - 1; i >= 0; i--)
2705 {
2706 bb = BASIC_BLOCK (i);
2707 branch = 0;
2708 if (spu_bb_info[i].prop_jump)
2709 {
2710 branch = spu_bb_info[i].prop_jump;
2711 branch_target = get_branch_target (branch);
2712 branch_addr = INSN_ADDRESSES (INSN_UID (branch));
2713 required_dist = spu_hint_dist;
2714 }
2715 /* Search from end of a block to beginning. In this loop, find
2716 jumps which need a branch and emit them only when:
2717 - it's an indirect branch and we're at the insn which sets
2718 the register
2719 - we're at an insn that will invalidate the hint. e.g., a
2720 call, another hint insn, inline asm that clobbers $hbr, and
2721 some inlined operations (divmodsi4). Don't consider jumps
2722 because they are only at the end of a block and are
2723 considered when we are deciding whether to propagate
2724 - we're getting too far away from the branch. The hbr insns
2725 only have a signed 10 bit offset
2726 We go back as far as possible so the branch will be considered
2727 for propagation when we get to the beginning of the block. */
2728 for (insn = BB_END (bb); insn; insn = PREV_INSN (insn))
2729 {
2730 if (INSN_P (insn))
2731 {
2732 insn_addr = INSN_ADDRESSES (INSN_UID (insn));
2733 if (branch
2734 && ((GET_CODE (branch_target) == REG
2735 && set_of (branch_target, insn) != NULL_RTX)
2736 || insn_clobbers_hbr (insn)
2737 || branch_addr - insn_addr > 600))
2738 {
2739 rtx next = NEXT_INSN (insn);
2740 int next_addr = INSN_ADDRESSES (INSN_UID (next));
2741 if (insn != BB_END (bb)
2742 && branch_addr - next_addr >= required_dist)
2743 {
2744 if (dump_file)
2745 fprintf (dump_file,
2746 "hint for %i in block %i before %i\n",
2747 INSN_UID (branch), bb->index,
2748 INSN_UID (next));
2749 spu_emit_branch_hint (next, branch, branch_target,
2750 branch_addr - next_addr, blocks);
2751 }
2752 branch = 0;
2753 }
2754
2755 /* JUMP_P will only be true at the end of a block. When
2756 branch is already set it means we've previously decided
2757 to propagate a hint for that branch into this block. */
2758 if (CALL_P (insn) || (JUMP_P (insn) && !branch))
2759 {
2760 branch = 0;
2761 if ((branch_target = get_branch_target (insn)))
2762 {
2763 branch = insn;
2764 branch_addr = insn_addr;
2765 required_dist = spu_hint_dist;
2766 }
2767 }
2768 }
2769 if (insn == BB_HEAD (bb))
2770 break;
2771 }
2772
2773 if (branch)
2774 {
2775 /* If we haven't emitted a hint for this branch yet, it might
2776 be profitable to emit it in one of the predecessor blocks,
2777 especially for loops. */
2778 rtx bbend;
2779 basic_block prev = 0, prop = 0, prev2 = 0;
2780 int loop_exit = 0, simple_loop = 0;
2781 int next_addr = INSN_ADDRESSES (INSN_UID (NEXT_INSN (insn)));
2782
2783 for (j = 0; j < EDGE_COUNT (bb->preds); j++)
2784 if (EDGE_PRED (bb, j)->flags & EDGE_FALLTHRU)
2785 prev = EDGE_PRED (bb, j)->src;
2786 else
2787 prev2 = EDGE_PRED (bb, j)->src;
2788
2789 for (j = 0; j < EDGE_COUNT (bb->succs); j++)
2790 if (EDGE_SUCC (bb, j)->flags & EDGE_LOOP_EXIT)
2791 loop_exit = 1;
2792 else if (EDGE_SUCC (bb, j)->dest == bb)
2793 simple_loop = 1;
2794
2795 /* If this branch is a loop exit then propagate to previous
2796 fallthru block. This catches the cases when it is a simple
2797 loop or when there is an initial branch into the loop. */
2798 if (prev && (loop_exit || simple_loop)
2799 && prev->loop_depth <= bb->loop_depth)
2800 prop = prev;
2801
2802 /* If there is only one adjacent predecessor. Don't propagate
2803 outside this loop. This loop_depth test isn't perfect, but
2804 I'm not sure the loop_father member is valid at this point. */
2805 else if (prev && single_pred_p (bb)
2806 && prev->loop_depth == bb->loop_depth)
2807 prop = prev;
2808
2809 /* If this is the JOIN block of a simple IF-THEN then
2810 propogate the hint to the HEADER block. */
2811 else if (prev && prev2
2812 && EDGE_COUNT (bb->preds) == 2
2813 && EDGE_COUNT (prev->preds) == 1
2814 && EDGE_PRED (prev, 0)->src == prev2
2815 && prev2->loop_depth == bb->loop_depth
2816 && GET_CODE (branch_target) != REG)
2817 prop = prev;
2818
2819 /* Don't propagate when:
2820 - this is a simple loop and the hint would be too far
2821 - this is not a simple loop and there are 16 insns in
2822 this block already
2823 - the predecessor block ends in a branch that will be
2824 hinted
2825 - the predecessor block ends in an insn that invalidates
2826 the hint */
2827 if (prop
2828 && prop->index >= 0
2829 && (bbend = BB_END (prop))
2830 && branch_addr - INSN_ADDRESSES (INSN_UID (bbend)) <
2831 (simple_loop ? 600 : 16 * 4) && get_branch_target (bbend) == 0
2832 && (JUMP_P (bbend) || !insn_clobbers_hbr (bbend)))
2833 {
2834 if (dump_file)
2835 fprintf (dump_file, "propagate from %i to %i (loop depth %i) "
2836 "for %i (loop_exit %i simple_loop %i dist %i)\n",
2837 bb->index, prop->index, bb->loop_depth,
2838 INSN_UID (branch), loop_exit, simple_loop,
2839 branch_addr - INSN_ADDRESSES (INSN_UID (bbend)));
2840
2841 spu_bb_info[prop->index].prop_jump = branch;
2842 spu_bb_info[prop->index].bb_index = i;
2843 }
2844 else if (branch_addr - next_addr >= required_dist)
2845 {
2846 if (dump_file)
2847 fprintf (dump_file, "hint for %i in block %i before %i\n",
2848 INSN_UID (branch), bb->index,
2849 INSN_UID (NEXT_INSN (insn)));
2850 spu_emit_branch_hint (NEXT_INSN (insn), branch, branch_target,
2851 branch_addr - next_addr, blocks);
2852 }
2853 branch = 0;
2854 }
2855 }
2856 free (spu_bb_info);
2857
2858 if (!sbitmap_empty_p (blocks))
2859 find_many_sub_basic_blocks (blocks);
2860
2861 /* We have to schedule to make sure alignment is ok. */
2862 FOR_EACH_BB (bb) bb->flags &= ~BB_DISABLE_SCHEDULE;
2863
2864 /* The hints need to be scheduled, so call it again. */
2865 schedule_insns ();
2866
2867 insert_hbrp ();
2868
2869 pad_bb ();
2870
2871 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
2872 if (NONJUMP_INSN_P (insn) && INSN_CODE (insn) == CODE_FOR_hbr)
2873 {
2874 /* Adjust the LABEL_REF in a hint when we have inserted a nop
2875 between its branch label and the branch . We don't move the
2876 label because GCC expects it at the beginning of the block. */
2877 rtx unspec = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
2878 rtx label_ref = XVECEXP (unspec, 0, 0);
2879 rtx label = XEXP (label_ref, 0);
2880 rtx branch;
2881 int offset = 0;
2882 for (branch = NEXT_INSN (label);
2883 !JUMP_P (branch) && !CALL_P (branch);
2884 branch = NEXT_INSN (branch))
2885 if (NONJUMP_INSN_P (branch))
2886 offset += get_attr_length (branch);
2887 if (offset > 0)
2888 XVECEXP (unspec, 0, 0) = plus_constant (label_ref, offset);
2889 }
2890
2891 if (spu_flag_var_tracking)
2892 {
2893 df_analyze ();
2894 timevar_push (TV_VAR_TRACKING);
2895 variable_tracking_main ();
2896 timevar_pop (TV_VAR_TRACKING);
2897 df_finish_pass (false);
2898 }
2899
2900 free_bb_for_insn ();
2901
2902 in_spu_reorg = 0;
2903 }
2904 \f
2905
2906 /* Insn scheduling routines, primarily for dual issue. */
2907 static int
2908 spu_sched_issue_rate (void)
2909 {
2910 return 2;
2911 }
2912
2913 static int
2914 uses_ls_unit(rtx insn)
2915 {
2916 rtx set = single_set (insn);
2917 if (set != 0
2918 && (GET_CODE (SET_DEST (set)) == MEM
2919 || GET_CODE (SET_SRC (set)) == MEM))
2920 return 1;
2921 return 0;
2922 }
2923
2924 static int
2925 get_pipe (rtx insn)
2926 {
2927 enum attr_type t;
2928 /* Handle inline asm */
2929 if (INSN_CODE (insn) == -1)
2930 return -1;
2931 t = get_attr_type (insn);
2932 switch (t)
2933 {
2934 case TYPE_CONVERT:
2935 return -2;
2936 case TYPE_MULTI0:
2937 return -1;
2938
2939 case TYPE_FX2:
2940 case TYPE_FX3:
2941 case TYPE_SPR:
2942 case TYPE_NOP:
2943 case TYPE_FXB:
2944 case TYPE_FPD:
2945 case TYPE_FP6:
2946 case TYPE_FP7:
2947 return 0;
2948
2949 case TYPE_LNOP:
2950 case TYPE_SHUF:
2951 case TYPE_LOAD:
2952 case TYPE_STORE:
2953 case TYPE_BR:
2954 case TYPE_MULTI1:
2955 case TYPE_HBR:
2956 case TYPE_IPREFETCH:
2957 return 1;
2958 default:
2959 abort ();
2960 }
2961 }
2962
2963
2964 /* haifa-sched.c has a static variable that keeps track of the current
2965 cycle. It is passed to spu_sched_reorder, and we record it here for
2966 use by spu_sched_variable_issue. It won't be accurate if the
2967 scheduler updates it's clock_var between the two calls. */
2968 static int clock_var;
2969
2970 /* This is used to keep track of insn alignment. Set to 0 at the
2971 beginning of each block and increased by the "length" attr of each
2972 insn scheduled. */
2973 static int spu_sched_length;
2974
2975 /* Record when we've issued pipe0 and pipe1 insns so we can reorder the
2976 ready list appropriately in spu_sched_reorder(). */
2977 static int pipe0_clock;
2978 static int pipe1_clock;
2979
2980 static int prev_clock_var;
2981
2982 static int prev_priority;
2983
2984 /* The SPU needs to load the next ilb sometime during the execution of
2985 the previous ilb. There is a potential conflict if every cycle has a
2986 load or store. To avoid the conflict we make sure the load/store
2987 unit is free for at least one cycle during the execution of insns in
2988 the previous ilb. */
2989 static int spu_ls_first;
2990 static int prev_ls_clock;
2991
2992 static void
2993 spu_sched_init_global (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
2994 int max_ready ATTRIBUTE_UNUSED)
2995 {
2996 spu_sched_length = 0;
2997 }
2998
2999 static void
3000 spu_sched_init (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3001 int max_ready ATTRIBUTE_UNUSED)
3002 {
3003 if (align_labels > 4 || align_loops > 4 || align_jumps > 4)
3004 {
3005 /* When any block might be at least 8-byte aligned, assume they
3006 will all be at least 8-byte aligned to make sure dual issue
3007 works out correctly. */
3008 spu_sched_length = 0;
3009 }
3010 spu_ls_first = INT_MAX;
3011 clock_var = -1;
3012 prev_ls_clock = -1;
3013 pipe0_clock = -1;
3014 pipe1_clock = -1;
3015 prev_clock_var = -1;
3016 prev_priority = -1;
3017 }
3018
3019 static int
3020 spu_sched_variable_issue (FILE *file ATTRIBUTE_UNUSED,
3021 int verbose ATTRIBUTE_UNUSED, rtx insn, int more)
3022 {
3023 int len;
3024 int p;
3025 if (GET_CODE (PATTERN (insn)) == USE
3026 || GET_CODE (PATTERN (insn)) == CLOBBER
3027 || (len = get_attr_length (insn)) == 0)
3028 return more;
3029
3030 spu_sched_length += len;
3031
3032 /* Reset on inline asm */
3033 if (INSN_CODE (insn) == -1)
3034 {
3035 spu_ls_first = INT_MAX;
3036 pipe0_clock = -1;
3037 pipe1_clock = -1;
3038 return 0;
3039 }
3040 p = get_pipe (insn);
3041 if (p == 0)
3042 pipe0_clock = clock_var;
3043 else
3044 pipe1_clock = clock_var;
3045
3046 if (in_spu_reorg)
3047 {
3048 if (clock_var - prev_ls_clock > 1
3049 || INSN_CODE (insn) == CODE_FOR_iprefetch)
3050 spu_ls_first = INT_MAX;
3051 if (uses_ls_unit (insn))
3052 {
3053 if (spu_ls_first == INT_MAX)
3054 spu_ls_first = spu_sched_length;
3055 prev_ls_clock = clock_var;
3056 }
3057
3058 /* The scheduler hasn't inserted the nop, but we will later on.
3059 Include those nops in spu_sched_length. */
3060 if (prev_clock_var == clock_var && (spu_sched_length & 7))
3061 spu_sched_length += 4;
3062 prev_clock_var = clock_var;
3063
3064 /* more is -1 when called from spu_sched_reorder for new insns
3065 that don't have INSN_PRIORITY */
3066 if (more >= 0)
3067 prev_priority = INSN_PRIORITY (insn);
3068 }
3069
3070 /* Always try issueing more insns. spu_sched_reorder will decide
3071 when the cycle should be advanced. */
3072 return 1;
3073 }
3074
3075 /* This function is called for both TARGET_SCHED_REORDER and
3076 TARGET_SCHED_REORDER2. */
3077 static int
3078 spu_sched_reorder (FILE *file ATTRIBUTE_UNUSED, int verbose ATTRIBUTE_UNUSED,
3079 rtx *ready, int *nreadyp, int clock)
3080 {
3081 int i, nready = *nreadyp;
3082 int pipe_0, pipe_1, pipe_hbrp, pipe_ls, schedule_i;
3083 rtx insn;
3084
3085 clock_var = clock;
3086
3087 if (nready <= 0 || pipe1_clock >= clock)
3088 return 0;
3089
3090 /* Find any rtl insns that don't generate assembly insns and schedule
3091 them first. */
3092 for (i = nready - 1; i >= 0; i--)
3093 {
3094 insn = ready[i];
3095 if (INSN_CODE (insn) == -1
3096 || INSN_CODE (insn) == CODE_FOR_blockage
3097 || (INSN_P (insn) && get_attr_length (insn) == 0))
3098 {
3099 ready[i] = ready[nready - 1];
3100 ready[nready - 1] = insn;
3101 return 1;
3102 }
3103 }
3104
3105 pipe_0 = pipe_1 = pipe_hbrp = pipe_ls = schedule_i = -1;
3106 for (i = 0; i < nready; i++)
3107 if (INSN_CODE (ready[i]) != -1)
3108 {
3109 insn = ready[i];
3110 switch (get_attr_type (insn))
3111 {
3112 default:
3113 case TYPE_MULTI0:
3114 case TYPE_CONVERT:
3115 case TYPE_FX2:
3116 case TYPE_FX3:
3117 case TYPE_SPR:
3118 case TYPE_NOP:
3119 case TYPE_FXB:
3120 case TYPE_FPD:
3121 case TYPE_FP6:
3122 case TYPE_FP7:
3123 pipe_0 = i;
3124 break;
3125 case TYPE_LOAD:
3126 case TYPE_STORE:
3127 pipe_ls = i;
3128 case TYPE_LNOP:
3129 case TYPE_SHUF:
3130 case TYPE_BR:
3131 case TYPE_MULTI1:
3132 case TYPE_HBR:
3133 pipe_1 = i;
3134 break;
3135 case TYPE_IPREFETCH:
3136 pipe_hbrp = i;
3137 break;
3138 }
3139 }
3140
3141 /* In the first scheduling phase, schedule loads and stores together
3142 to increase the chance they will get merged during postreload CSE. */
3143 if (!reload_completed && pipe_ls >= 0)
3144 {
3145 insn = ready[pipe_ls];
3146 ready[pipe_ls] = ready[nready - 1];
3147 ready[nready - 1] = insn;
3148 return 1;
3149 }
3150
3151 /* If there is an hbrp ready, prefer it over other pipe 1 insns. */
3152 if (pipe_hbrp >= 0)
3153 pipe_1 = pipe_hbrp;
3154
3155 /* When we have loads/stores in every cycle of the last 15 insns and
3156 we are about to schedule another load/store, emit an hbrp insn
3157 instead. */
3158 if (in_spu_reorg
3159 && spu_sched_length - spu_ls_first >= 4 * 15
3160 && !(pipe0_clock < clock && pipe_0 >= 0) && pipe_1 == pipe_ls)
3161 {
3162 insn = sched_emit_insn (gen_iprefetch (GEN_INT (3)));
3163 recog_memoized (insn);
3164 if (pipe0_clock < clock)
3165 PUT_MODE (insn, TImode);
3166 spu_sched_variable_issue (file, verbose, insn, -1);
3167 return 0;
3168 }
3169
3170 /* In general, we want to emit nops to increase dual issue, but dual
3171 issue isn't faster when one of the insns could be scheduled later
3172 without effecting the critical path. We look at INSN_PRIORITY to
3173 make a good guess, but it isn't perfect so -mdual-nops=n can be
3174 used to effect it. */
3175 if (in_spu_reorg && spu_dual_nops < 10)
3176 {
3177 /* When we are at an even address and we are not issueing nops to
3178 improve scheduling then we need to advance the cycle. */
3179 if ((spu_sched_length & 7) == 0 && prev_clock_var == clock
3180 && (spu_dual_nops == 0
3181 || (pipe_1 != -1
3182 && prev_priority >
3183 INSN_PRIORITY (ready[pipe_1]) + spu_dual_nops)))
3184 return 0;
3185
3186 /* When at an odd address, schedule the highest priority insn
3187 without considering pipeline. */
3188 if ((spu_sched_length & 7) == 4 && prev_clock_var != clock
3189 && (spu_dual_nops == 0
3190 || (prev_priority >
3191 INSN_PRIORITY (ready[nready - 1]) + spu_dual_nops)))
3192 return 1;
3193 }
3194
3195
3196 /* We haven't issued a pipe0 insn yet this cycle, if there is a
3197 pipe0 insn in the ready list, schedule it. */
3198 if (pipe0_clock < clock && pipe_0 >= 0)
3199 schedule_i = pipe_0;
3200
3201 /* Either we've scheduled a pipe0 insn already or there is no pipe0
3202 insn to schedule. Put a pipe1 insn at the front of the ready list. */
3203 else
3204 schedule_i = pipe_1;
3205
3206 if (schedule_i > -1)
3207 {
3208 insn = ready[schedule_i];
3209 ready[schedule_i] = ready[nready - 1];
3210 ready[nready - 1] = insn;
3211 return 1;
3212 }
3213 return 0;
3214 }
3215
3216 /* INSN is dependent on DEP_INSN. */
3217 static int
3218 spu_sched_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
3219 {
3220 rtx set;
3221
3222 /* The blockage pattern is used to prevent instructions from being
3223 moved across it and has no cost. */
3224 if (INSN_CODE (insn) == CODE_FOR_blockage
3225 || INSN_CODE (dep_insn) == CODE_FOR_blockage)
3226 return 0;
3227
3228 if ((INSN_P (insn) && get_attr_length (insn) == 0)
3229 || (INSN_P (dep_insn) && get_attr_length (dep_insn) == 0))
3230 return 0;
3231
3232 /* Make sure hbrps are spread out. */
3233 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3234 && INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3235 return 8;
3236
3237 /* Make sure hints and hbrps are 2 cycles apart. */
3238 if ((INSN_CODE (insn) == CODE_FOR_iprefetch
3239 || INSN_CODE (insn) == CODE_FOR_hbr)
3240 && (INSN_CODE (dep_insn) == CODE_FOR_iprefetch
3241 || INSN_CODE (dep_insn) == CODE_FOR_hbr))
3242 return 2;
3243
3244 /* An hbrp has no real dependency on other insns. */
3245 if (INSN_CODE (insn) == CODE_FOR_iprefetch
3246 || INSN_CODE (dep_insn) == CODE_FOR_iprefetch)
3247 return 0;
3248
3249 /* Assuming that it is unlikely an argument register will be used in
3250 the first cycle of the called function, we reduce the cost for
3251 slightly better scheduling of dep_insn. When not hinted, the
3252 mispredicted branch would hide the cost as well. */
3253 if (CALL_P (insn))
3254 {
3255 rtx target = get_branch_target (insn);
3256 if (GET_CODE (target) != REG || !set_of (target, insn))
3257 return cost - 2;
3258 return cost;
3259 }
3260
3261 /* And when returning from a function, let's assume the return values
3262 are completed sooner too. */
3263 if (CALL_P (dep_insn))
3264 return cost - 2;
3265
3266 /* Make sure an instruction that loads from the back chain is schedule
3267 away from the return instruction so a hint is more likely to get
3268 issued. */
3269 if (INSN_CODE (insn) == CODE_FOR__return
3270 && (set = single_set (dep_insn))
3271 && GET_CODE (SET_DEST (set)) == REG
3272 && REGNO (SET_DEST (set)) == LINK_REGISTER_REGNUM)
3273 return 20;
3274
3275 /* The dfa scheduler sets cost to 0 for all anti-dependencies and the
3276 scheduler makes every insn in a block anti-dependent on the final
3277 jump_insn. We adjust here so higher cost insns will get scheduled
3278 earlier. */
3279 if (JUMP_P (insn) && REG_NOTE_KIND (link) == REG_DEP_ANTI)
3280 return insn_cost (dep_insn) - 3;
3281
3282 return cost;
3283 }
3284 \f
3285 /* Create a CONST_DOUBLE from a string. */
3286 struct rtx_def *
3287 spu_float_const (const char *string, enum machine_mode mode)
3288 {
3289 REAL_VALUE_TYPE value;
3290 value = REAL_VALUE_ATOF (string, mode);
3291 return CONST_DOUBLE_FROM_REAL_VALUE (value, mode);
3292 }
3293
3294 int
3295 spu_constant_address_p (rtx x)
3296 {
3297 return (GET_CODE (x) == LABEL_REF || GET_CODE (x) == SYMBOL_REF
3298 || GET_CODE (x) == CONST_INT || GET_CODE (x) == CONST
3299 || GET_CODE (x) == HIGH);
3300 }
3301
3302 static enum spu_immediate
3303 which_immediate_load (HOST_WIDE_INT val)
3304 {
3305 gcc_assert (val == trunc_int_for_mode (val, SImode));
3306
3307 if (val >= -0x8000 && val <= 0x7fff)
3308 return SPU_IL;
3309 if (val >= 0 && val <= 0x3ffff)
3310 return SPU_ILA;
3311 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3312 return SPU_ILH;
3313 if ((val & 0xffff) == 0)
3314 return SPU_ILHU;
3315
3316 return SPU_NONE;
3317 }
3318
3319 /* Return true when OP can be loaded by one of the il instructions, or
3320 when flow2 is not completed and OP can be loaded using ilhu and iohl. */
3321 int
3322 immediate_load_p (rtx op, enum machine_mode mode)
3323 {
3324 if (CONSTANT_P (op))
3325 {
3326 enum immediate_class c = classify_immediate (op, mode);
3327 return c == IC_IL1 || c == IC_IL1s
3328 || (!epilogue_completed && (c == IC_IL2 || c == IC_IL2s));
3329 }
3330 return 0;
3331 }
3332
3333 /* Return true if the first SIZE bytes of arr is a constant that can be
3334 generated with cbd, chd, cwd or cdd. When non-NULL, PRUN and PSTART
3335 represent the size and offset of the instruction to use. */
3336 static int
3337 cpat_info(unsigned char *arr, int size, int *prun, int *pstart)
3338 {
3339 int cpat, run, i, start;
3340 cpat = 1;
3341 run = 0;
3342 start = -1;
3343 for (i = 0; i < size && cpat; i++)
3344 if (arr[i] != i+16)
3345 {
3346 if (!run)
3347 {
3348 start = i;
3349 if (arr[i] == 3)
3350 run = 1;
3351 else if (arr[i] == 2 && arr[i+1] == 3)
3352 run = 2;
3353 else if (arr[i] == 0)
3354 {
3355 while (arr[i+run] == run && i+run < 16)
3356 run++;
3357 if (run != 4 && run != 8)
3358 cpat = 0;
3359 }
3360 else
3361 cpat = 0;
3362 if ((i & (run-1)) != 0)
3363 cpat = 0;
3364 i += run;
3365 }
3366 else
3367 cpat = 0;
3368 }
3369 if (cpat && (run || size < 16))
3370 {
3371 if (run == 0)
3372 run = 1;
3373 if (prun)
3374 *prun = run;
3375 if (pstart)
3376 *pstart = start == -1 ? 16-run : start;
3377 return 1;
3378 }
3379 return 0;
3380 }
3381
3382 /* OP is a CONSTANT_P. Determine what instructions can be used to load
3383 it into a register. MODE is only valid when OP is a CONST_INT. */
3384 static enum immediate_class
3385 classify_immediate (rtx op, enum machine_mode mode)
3386 {
3387 HOST_WIDE_INT val;
3388 unsigned char arr[16];
3389 int i, j, repeated, fsmbi, repeat;
3390
3391 gcc_assert (CONSTANT_P (op));
3392
3393 if (GET_MODE (op) != VOIDmode)
3394 mode = GET_MODE (op);
3395
3396 /* A V4SI const_vector with all identical symbols is ok. */
3397 if (!flag_pic
3398 && mode == V4SImode
3399 && GET_CODE (op) == CONST_VECTOR
3400 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_INT
3401 && GET_CODE (CONST_VECTOR_ELT (op, 0)) != CONST_DOUBLE
3402 && CONST_VECTOR_ELT (op, 0) == CONST_VECTOR_ELT (op, 1)
3403 && CONST_VECTOR_ELT (op, 1) == CONST_VECTOR_ELT (op, 2)
3404 && CONST_VECTOR_ELT (op, 2) == CONST_VECTOR_ELT (op, 3))
3405 op = CONST_VECTOR_ELT (op, 0);
3406
3407 switch (GET_CODE (op))
3408 {
3409 case SYMBOL_REF:
3410 case LABEL_REF:
3411 return TARGET_LARGE_MEM ? IC_IL2s : IC_IL1s;
3412
3413 case CONST:
3414 /* We can never know if the resulting address fits in 18 bits and can be
3415 loaded with ila. For now, assume the address will not overflow if
3416 the displacement is "small" (fits 'K' constraint). */
3417 if (!TARGET_LARGE_MEM && GET_CODE (XEXP (op, 0)) == PLUS)
3418 {
3419 rtx sym = XEXP (XEXP (op, 0), 0);
3420 rtx cst = XEXP (XEXP (op, 0), 1);
3421
3422 if (GET_CODE (sym) == SYMBOL_REF
3423 && GET_CODE (cst) == CONST_INT
3424 && satisfies_constraint_K (cst))
3425 return IC_IL1s;
3426 }
3427 return IC_IL2s;
3428
3429 case HIGH:
3430 return IC_IL1s;
3431
3432 case CONST_VECTOR:
3433 for (i = 0; i < GET_MODE_NUNITS (mode); i++)
3434 if (GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_INT
3435 && GET_CODE (CONST_VECTOR_ELT (op, i)) != CONST_DOUBLE)
3436 return IC_POOL;
3437 /* Fall through. */
3438
3439 case CONST_INT:
3440 case CONST_DOUBLE:
3441 constant_to_array (mode, op, arr);
3442
3443 /* Check that each 4-byte slot is identical. */
3444 repeated = 1;
3445 for (i = 4; i < 16; i += 4)
3446 for (j = 0; j < 4; j++)
3447 if (arr[j] != arr[i + j])
3448 repeated = 0;
3449
3450 if (repeated)
3451 {
3452 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3453 val = trunc_int_for_mode (val, SImode);
3454
3455 if (which_immediate_load (val) != SPU_NONE)
3456 return IC_IL1;
3457 }
3458
3459 /* Any mode of 2 bytes or smaller can be loaded with an il
3460 instruction. */
3461 gcc_assert (GET_MODE_SIZE (mode) > 2);
3462
3463 fsmbi = 1;
3464 repeat = 0;
3465 for (i = 0; i < 16 && fsmbi; i++)
3466 if (arr[i] != 0 && repeat == 0)
3467 repeat = arr[i];
3468 else if (arr[i] != 0 && arr[i] != repeat)
3469 fsmbi = 0;
3470 if (fsmbi)
3471 return repeat == 0xff ? IC_FSMBI : IC_FSMBI2;
3472
3473 if (cpat_info (arr, GET_MODE_SIZE (mode), 0, 0))
3474 return IC_CPAT;
3475
3476 if (repeated)
3477 return IC_IL2;
3478
3479 return IC_POOL;
3480 default:
3481 break;
3482 }
3483 gcc_unreachable ();
3484 }
3485
3486 static enum spu_immediate
3487 which_logical_immediate (HOST_WIDE_INT val)
3488 {
3489 gcc_assert (val == trunc_int_for_mode (val, SImode));
3490
3491 if (val >= -0x200 && val <= 0x1ff)
3492 return SPU_ORI;
3493 if (val >= 0 && val <= 0xffff)
3494 return SPU_IOHL;
3495 if ((val & 0xffff) == ((val >> 16) & 0xffff))
3496 {
3497 val = trunc_int_for_mode (val, HImode);
3498 if (val >= -0x200 && val <= 0x1ff)
3499 return SPU_ORHI;
3500 if ((val & 0xff) == ((val >> 8) & 0xff))
3501 {
3502 val = trunc_int_for_mode (val, QImode);
3503 if (val >= -0x200 && val <= 0x1ff)
3504 return SPU_ORBI;
3505 }
3506 }
3507 return SPU_NONE;
3508 }
3509
3510 /* Return TRUE when X, a CONST_VECTOR, only contains CONST_INTs or
3511 CONST_DOUBLEs. */
3512 static int
3513 const_vector_immediate_p (rtx x)
3514 {
3515 int i;
3516 gcc_assert (GET_CODE (x) == CONST_VECTOR);
3517 for (i = 0; i < GET_MODE_NUNITS (GET_MODE (x)); i++)
3518 if (GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_INT
3519 && GET_CODE (CONST_VECTOR_ELT (x, i)) != CONST_DOUBLE)
3520 return 0;
3521 return 1;
3522 }
3523
3524 int
3525 logical_immediate_p (rtx op, enum machine_mode mode)
3526 {
3527 HOST_WIDE_INT val;
3528 unsigned char arr[16];
3529 int i, j;
3530
3531 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3532 || GET_CODE (op) == CONST_VECTOR);
3533
3534 if (GET_CODE (op) == CONST_VECTOR
3535 && !const_vector_immediate_p (op))
3536 return 0;
3537
3538 if (GET_MODE (op) != VOIDmode)
3539 mode = GET_MODE (op);
3540
3541 constant_to_array (mode, op, arr);
3542
3543 /* Check that bytes are repeated. */
3544 for (i = 4; i < 16; i += 4)
3545 for (j = 0; j < 4; j++)
3546 if (arr[j] != arr[i + j])
3547 return 0;
3548
3549 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3550 val = trunc_int_for_mode (val, SImode);
3551
3552 i = which_logical_immediate (val);
3553 return i != SPU_NONE && i != SPU_IOHL;
3554 }
3555
3556 int
3557 iohl_immediate_p (rtx op, enum machine_mode mode)
3558 {
3559 HOST_WIDE_INT val;
3560 unsigned char arr[16];
3561 int i, j;
3562
3563 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3564 || GET_CODE (op) == CONST_VECTOR);
3565
3566 if (GET_CODE (op) == CONST_VECTOR
3567 && !const_vector_immediate_p (op))
3568 return 0;
3569
3570 if (GET_MODE (op) != VOIDmode)
3571 mode = GET_MODE (op);
3572
3573 constant_to_array (mode, op, arr);
3574
3575 /* Check that bytes are repeated. */
3576 for (i = 4; i < 16; i += 4)
3577 for (j = 0; j < 4; j++)
3578 if (arr[j] != arr[i + j])
3579 return 0;
3580
3581 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
3582 val = trunc_int_for_mode (val, SImode);
3583
3584 return val >= 0 && val <= 0xffff;
3585 }
3586
3587 int
3588 arith_immediate_p (rtx op, enum machine_mode mode,
3589 HOST_WIDE_INT low, HOST_WIDE_INT high)
3590 {
3591 HOST_WIDE_INT val;
3592 unsigned char arr[16];
3593 int bytes, i, j;
3594
3595 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3596 || GET_CODE (op) == CONST_VECTOR);
3597
3598 if (GET_CODE (op) == CONST_VECTOR
3599 && !const_vector_immediate_p (op))
3600 return 0;
3601
3602 if (GET_MODE (op) != VOIDmode)
3603 mode = GET_MODE (op);
3604
3605 constant_to_array (mode, op, arr);
3606
3607 if (VECTOR_MODE_P (mode))
3608 mode = GET_MODE_INNER (mode);
3609
3610 bytes = GET_MODE_SIZE (mode);
3611 mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3612
3613 /* Check that bytes are repeated. */
3614 for (i = bytes; i < 16; i += bytes)
3615 for (j = 0; j < bytes; j++)
3616 if (arr[j] != arr[i + j])
3617 return 0;
3618
3619 val = arr[0];
3620 for (j = 1; j < bytes; j++)
3621 val = (val << 8) | arr[j];
3622
3623 val = trunc_int_for_mode (val, mode);
3624
3625 return val >= low && val <= high;
3626 }
3627
3628 /* TRUE when op is an immediate and an exact power of 2, and given that
3629 OP is 2^scale, scale >= LOW && scale <= HIGH. When OP is a vector,
3630 all entries must be the same. */
3631 bool
3632 exp2_immediate_p (rtx op, enum machine_mode mode, int low, int high)
3633 {
3634 enum machine_mode int_mode;
3635 HOST_WIDE_INT val;
3636 unsigned char arr[16];
3637 int bytes, i, j;
3638
3639 gcc_assert (GET_CODE (op) == CONST_INT || GET_CODE (op) == CONST_DOUBLE
3640 || GET_CODE (op) == CONST_VECTOR);
3641
3642 if (GET_CODE (op) == CONST_VECTOR
3643 && !const_vector_immediate_p (op))
3644 return 0;
3645
3646 if (GET_MODE (op) != VOIDmode)
3647 mode = GET_MODE (op);
3648
3649 constant_to_array (mode, op, arr);
3650
3651 if (VECTOR_MODE_P (mode))
3652 mode = GET_MODE_INNER (mode);
3653
3654 bytes = GET_MODE_SIZE (mode);
3655 int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
3656
3657 /* Check that bytes are repeated. */
3658 for (i = bytes; i < 16; i += bytes)
3659 for (j = 0; j < bytes; j++)
3660 if (arr[j] != arr[i + j])
3661 return 0;
3662
3663 val = arr[0];
3664 for (j = 1; j < bytes; j++)
3665 val = (val << 8) | arr[j];
3666
3667 val = trunc_int_for_mode (val, int_mode);
3668
3669 /* Currently, we only handle SFmode */
3670 gcc_assert (mode == SFmode);
3671 if (mode == SFmode)
3672 {
3673 int exp = (val >> 23) - 127;
3674 return val > 0 && (val & 0x007fffff) == 0
3675 && exp >= low && exp <= high;
3676 }
3677 return FALSE;
3678 }
3679
3680 /* Return true if X is a SYMBOL_REF to an __ea qualified variable. */
3681
3682 static int
3683 ea_symbol_ref (rtx *px, void *data ATTRIBUTE_UNUSED)
3684 {
3685 rtx x = *px;
3686 tree decl;
3687
3688 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
3689 {
3690 rtx plus = XEXP (x, 0);
3691 rtx op0 = XEXP (plus, 0);
3692 rtx op1 = XEXP (plus, 1);
3693 if (GET_CODE (op1) == CONST_INT)
3694 x = op0;
3695 }
3696
3697 return (GET_CODE (x) == SYMBOL_REF
3698 && (decl = SYMBOL_REF_DECL (x)) != 0
3699 && TREE_CODE (decl) == VAR_DECL
3700 && TYPE_ADDR_SPACE (TREE_TYPE (decl)));
3701 }
3702
3703 /* We accept:
3704 - any 32-bit constant (SImode, SFmode)
3705 - any constant that can be generated with fsmbi (any mode)
3706 - a 64-bit constant where the high and low bits are identical
3707 (DImode, DFmode)
3708 - a 128-bit constant where the four 32-bit words match. */
3709 int
3710 spu_legitimate_constant_p (rtx x)
3711 {
3712 if (GET_CODE (x) == HIGH)
3713 x = XEXP (x, 0);
3714
3715 /* Reject any __ea qualified reference. These can't appear in
3716 instructions but must be forced to the constant pool. */
3717 if (for_each_rtx (&x, ea_symbol_ref, 0))
3718 return 0;
3719
3720 /* V4SI with all identical symbols is valid. */
3721 if (!flag_pic
3722 && GET_MODE (x) == V4SImode
3723 && (GET_CODE (CONST_VECTOR_ELT (x, 0)) == SYMBOL_REF
3724 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == LABEL_REF
3725 || GET_CODE (CONST_VECTOR_ELT (x, 0)) == CONST))
3726 return CONST_VECTOR_ELT (x, 0) == CONST_VECTOR_ELT (x, 1)
3727 && CONST_VECTOR_ELT (x, 1) == CONST_VECTOR_ELT (x, 2)
3728 && CONST_VECTOR_ELT (x, 2) == CONST_VECTOR_ELT (x, 3);
3729
3730 if (GET_CODE (x) == CONST_VECTOR
3731 && !const_vector_immediate_p (x))
3732 return 0;
3733 return 1;
3734 }
3735
3736 /* Valid address are:
3737 - symbol_ref, label_ref, const
3738 - reg
3739 - reg + const_int, where const_int is 16 byte aligned
3740 - reg + reg, alignment doesn't matter
3741 The alignment matters in the reg+const case because lqd and stqd
3742 ignore the 4 least significant bits of the const. We only care about
3743 16 byte modes because the expand phase will change all smaller MEM
3744 references to TImode. */
3745 static bool
3746 spu_legitimate_address_p (enum machine_mode mode,
3747 rtx x, bool reg_ok_strict)
3748 {
3749 int aligned = GET_MODE_SIZE (mode) >= 16;
3750 if (aligned
3751 && GET_CODE (x) == AND
3752 && GET_CODE (XEXP (x, 1)) == CONST_INT
3753 && INTVAL (XEXP (x, 1)) == (HOST_WIDE_INT) - 16)
3754 x = XEXP (x, 0);
3755 switch (GET_CODE (x))
3756 {
3757 case LABEL_REF:
3758 return !TARGET_LARGE_MEM;
3759
3760 case SYMBOL_REF:
3761 case CONST:
3762 /* Keep __ea references until reload so that spu_expand_mov can see them
3763 in MEMs. */
3764 if (ea_symbol_ref (&x, 0))
3765 return !reload_in_progress && !reload_completed;
3766 return !TARGET_LARGE_MEM;
3767
3768 case CONST_INT:
3769 return INTVAL (x) >= 0 && INTVAL (x) <= 0x3ffff;
3770
3771 case SUBREG:
3772 x = XEXP (x, 0);
3773 if (REG_P (x))
3774 return 0;
3775
3776 case REG:
3777 return INT_REG_OK_FOR_BASE_P (x, reg_ok_strict);
3778
3779 case PLUS:
3780 case LO_SUM:
3781 {
3782 rtx op0 = XEXP (x, 0);
3783 rtx op1 = XEXP (x, 1);
3784 if (GET_CODE (op0) == SUBREG)
3785 op0 = XEXP (op0, 0);
3786 if (GET_CODE (op1) == SUBREG)
3787 op1 = XEXP (op1, 0);
3788 if (GET_CODE (op0) == REG
3789 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3790 && GET_CODE (op1) == CONST_INT
3791 && INTVAL (op1) >= -0x2000
3792 && INTVAL (op1) <= 0x1fff
3793 && (!aligned || (INTVAL (op1) & 15) == 0))
3794 return TRUE;
3795 if (GET_CODE (op0) == REG
3796 && INT_REG_OK_FOR_BASE_P (op0, reg_ok_strict)
3797 && GET_CODE (op1) == REG
3798 && INT_REG_OK_FOR_INDEX_P (op1, reg_ok_strict))
3799 return TRUE;
3800 }
3801 break;
3802
3803 default:
3804 break;
3805 }
3806 return FALSE;
3807 }
3808
3809 /* Like spu_legitimate_address_p, except with named addresses. */
3810 static bool
3811 spu_addr_space_legitimate_address_p (enum machine_mode mode, rtx x,
3812 bool reg_ok_strict, addr_space_t as)
3813 {
3814 if (as == ADDR_SPACE_EA)
3815 return (REG_P (x) && (GET_MODE (x) == EAmode));
3816
3817 else if (as != ADDR_SPACE_GENERIC)
3818 gcc_unreachable ();
3819
3820 return spu_legitimate_address_p (mode, x, reg_ok_strict);
3821 }
3822
3823 /* When the address is reg + const_int, force the const_int into a
3824 register. */
3825 rtx
3826 spu_legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED,
3827 enum machine_mode mode ATTRIBUTE_UNUSED)
3828 {
3829 rtx op0, op1;
3830 /* Make sure both operands are registers. */
3831 if (GET_CODE (x) == PLUS)
3832 {
3833 op0 = XEXP (x, 0);
3834 op1 = XEXP (x, 1);
3835 if (ALIGNED_SYMBOL_REF_P (op0))
3836 {
3837 op0 = force_reg (Pmode, op0);
3838 mark_reg_pointer (op0, 128);
3839 }
3840 else if (GET_CODE (op0) != REG)
3841 op0 = force_reg (Pmode, op0);
3842 if (ALIGNED_SYMBOL_REF_P (op1))
3843 {
3844 op1 = force_reg (Pmode, op1);
3845 mark_reg_pointer (op1, 128);
3846 }
3847 else if (GET_CODE (op1) != REG)
3848 op1 = force_reg (Pmode, op1);
3849 x = gen_rtx_PLUS (Pmode, op0, op1);
3850 }
3851 return x;
3852 }
3853
3854 /* Like spu_legitimate_address, except with named address support. */
3855 static rtx
3856 spu_addr_space_legitimize_address (rtx x, rtx oldx, enum machine_mode mode,
3857 addr_space_t as)
3858 {
3859 if (as != ADDR_SPACE_GENERIC)
3860 return x;
3861
3862 return spu_legitimize_address (x, oldx, mode);
3863 }
3864
3865 /* Handle an attribute requiring a FUNCTION_DECL; arguments as in
3866 struct attribute_spec.handler. */
3867 static tree
3868 spu_handle_fndecl_attribute (tree * node,
3869 tree name,
3870 tree args ATTRIBUTE_UNUSED,
3871 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3872 {
3873 if (TREE_CODE (*node) != FUNCTION_DECL)
3874 {
3875 warning (0, "%qE attribute only applies to functions",
3876 name);
3877 *no_add_attrs = true;
3878 }
3879
3880 return NULL_TREE;
3881 }
3882
3883 /* Handle the "vector" attribute. */
3884 static tree
3885 spu_handle_vector_attribute (tree * node, tree name,
3886 tree args ATTRIBUTE_UNUSED,
3887 int flags ATTRIBUTE_UNUSED, bool * no_add_attrs)
3888 {
3889 tree type = *node, result = NULL_TREE;
3890 enum machine_mode mode;
3891 int unsigned_p;
3892
3893 while (POINTER_TYPE_P (type)
3894 || TREE_CODE (type) == FUNCTION_TYPE
3895 || TREE_CODE (type) == METHOD_TYPE || TREE_CODE (type) == ARRAY_TYPE)
3896 type = TREE_TYPE (type);
3897
3898 mode = TYPE_MODE (type);
3899
3900 unsigned_p = TYPE_UNSIGNED (type);
3901 switch (mode)
3902 {
3903 case DImode:
3904 result = (unsigned_p ? unsigned_V2DI_type_node : V2DI_type_node);
3905 break;
3906 case SImode:
3907 result = (unsigned_p ? unsigned_V4SI_type_node : V4SI_type_node);
3908 break;
3909 case HImode:
3910 result = (unsigned_p ? unsigned_V8HI_type_node : V8HI_type_node);
3911 break;
3912 case QImode:
3913 result = (unsigned_p ? unsigned_V16QI_type_node : V16QI_type_node);
3914 break;
3915 case SFmode:
3916 result = V4SF_type_node;
3917 break;
3918 case DFmode:
3919 result = V2DF_type_node;
3920 break;
3921 default:
3922 break;
3923 }
3924
3925 /* Propagate qualifiers attached to the element type
3926 onto the vector type. */
3927 if (result && result != type && TYPE_QUALS (type))
3928 result = build_qualified_type (result, TYPE_QUALS (type));
3929
3930 *no_add_attrs = true; /* No need to hang on to the attribute. */
3931
3932 if (!result)
3933 warning (0, "%qE attribute ignored", name);
3934 else
3935 *node = lang_hooks.types.reconstruct_complex_type (*node, result);
3936
3937 return NULL_TREE;
3938 }
3939
3940 /* Return nonzero if FUNC is a naked function. */
3941 static int
3942 spu_naked_function_p (tree func)
3943 {
3944 tree a;
3945
3946 if (TREE_CODE (func) != FUNCTION_DECL)
3947 abort ();
3948
3949 a = lookup_attribute ("naked", DECL_ATTRIBUTES (func));
3950 return a != NULL_TREE;
3951 }
3952
3953 int
3954 spu_initial_elimination_offset (int from, int to)
3955 {
3956 int saved_regs_size = spu_saved_regs_size ();
3957 int sp_offset = 0;
3958 if (!current_function_is_leaf || crtl->outgoing_args_size
3959 || get_frame_size () || saved_regs_size)
3960 sp_offset = STACK_POINTER_OFFSET;
3961 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3962 return get_frame_size () + crtl->outgoing_args_size + sp_offset;
3963 else if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3964 return get_frame_size ();
3965 else if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
3966 return sp_offset + crtl->outgoing_args_size
3967 + get_frame_size () + saved_regs_size + STACK_POINTER_OFFSET;
3968 else if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
3969 return get_frame_size () + saved_regs_size + sp_offset;
3970 else
3971 gcc_unreachable ();
3972 }
3973
3974 rtx
3975 spu_function_value (const_tree type, const_tree func ATTRIBUTE_UNUSED)
3976 {
3977 enum machine_mode mode = TYPE_MODE (type);
3978 int byte_size = ((mode == BLKmode)
3979 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
3980
3981 /* Make sure small structs are left justified in a register. */
3982 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
3983 && byte_size <= UNITS_PER_WORD * MAX_REGISTER_RETURN && byte_size > 0)
3984 {
3985 enum machine_mode smode;
3986 rtvec v;
3987 int i;
3988 int nregs = (byte_size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3989 int n = byte_size / UNITS_PER_WORD;
3990 v = rtvec_alloc (nregs);
3991 for (i = 0; i < n; i++)
3992 {
3993 RTVEC_ELT (v, i) = gen_rtx_EXPR_LIST (VOIDmode,
3994 gen_rtx_REG (TImode,
3995 FIRST_RETURN_REGNUM
3996 + i),
3997 GEN_INT (UNITS_PER_WORD * i));
3998 byte_size -= UNITS_PER_WORD;
3999 }
4000
4001 if (n < nregs)
4002 {
4003 if (byte_size < 4)
4004 byte_size = 4;
4005 smode =
4006 smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4007 RTVEC_ELT (v, n) =
4008 gen_rtx_EXPR_LIST (VOIDmode,
4009 gen_rtx_REG (smode, FIRST_RETURN_REGNUM + n),
4010 GEN_INT (UNITS_PER_WORD * n));
4011 }
4012 return gen_rtx_PARALLEL (mode, v);
4013 }
4014 return gen_rtx_REG (mode, FIRST_RETURN_REGNUM);
4015 }
4016
4017 static rtx
4018 spu_function_arg (CUMULATIVE_ARGS *cum,
4019 enum machine_mode mode,
4020 const_tree type, bool named ATTRIBUTE_UNUSED)
4021 {
4022 int byte_size;
4023
4024 if (cum >= MAX_REGISTER_ARGS)
4025 return 0;
4026
4027 byte_size = ((mode == BLKmode)
4028 ? int_size_in_bytes (type) : GET_MODE_SIZE (mode));
4029
4030 /* The ABI does not allow parameters to be passed partially in
4031 reg and partially in stack. */
4032 if ((cum + (byte_size + 15) / 16) > MAX_REGISTER_ARGS)
4033 return 0;
4034
4035 /* Make sure small structs are left justified in a register. */
4036 if ((mode == BLKmode || (type && AGGREGATE_TYPE_P (type)))
4037 && byte_size < UNITS_PER_WORD && byte_size > 0)
4038 {
4039 enum machine_mode smode;
4040 rtx gr_reg;
4041 if (byte_size < 4)
4042 byte_size = 4;
4043 smode = smallest_mode_for_size (byte_size * BITS_PER_UNIT, MODE_INT);
4044 gr_reg = gen_rtx_EXPR_LIST (VOIDmode,
4045 gen_rtx_REG (smode, FIRST_ARG_REGNUM + cum),
4046 const0_rtx);
4047 return gen_rtx_PARALLEL (mode, gen_rtvec (1, gr_reg));
4048 }
4049 else
4050 return gen_rtx_REG (mode, FIRST_ARG_REGNUM + cum);
4051 }
4052
4053 static void
4054 spu_function_arg_advance (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4055 const_tree type, bool named ATTRIBUTE_UNUSED)
4056 {
4057 *cum += (type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
4058 ? 1
4059 : mode == BLKmode
4060 ? ((int_size_in_bytes (type) + 15) / 16)
4061 : mode == VOIDmode
4062 ? 1
4063 : HARD_REGNO_NREGS (cum, mode));
4064 }
4065
4066 /* Variable sized types are passed by reference. */
4067 static bool
4068 spu_pass_by_reference (CUMULATIVE_ARGS * cum ATTRIBUTE_UNUSED,
4069 enum machine_mode mode ATTRIBUTE_UNUSED,
4070 const_tree type, bool named ATTRIBUTE_UNUSED)
4071 {
4072 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
4073 }
4074 \f
4075
4076 /* Var args. */
4077
4078 /* Create and return the va_list datatype.
4079
4080 On SPU, va_list is an array type equivalent to
4081
4082 typedef struct __va_list_tag
4083 {
4084 void *__args __attribute__((__aligned(16)));
4085 void *__skip __attribute__((__aligned(16)));
4086
4087 } va_list[1];
4088
4089 where __args points to the arg that will be returned by the next
4090 va_arg(), and __skip points to the previous stack frame such that
4091 when __args == __skip we should advance __args by 32 bytes. */
4092 static tree
4093 spu_build_builtin_va_list (void)
4094 {
4095 tree f_args, f_skip, record, type_decl;
4096 bool owp;
4097
4098 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4099
4100 type_decl =
4101 build_decl (BUILTINS_LOCATION,
4102 TYPE_DECL, get_identifier ("__va_list_tag"), record);
4103
4104 f_args = build_decl (BUILTINS_LOCATION,
4105 FIELD_DECL, get_identifier ("__args"), ptr_type_node);
4106 f_skip = build_decl (BUILTINS_LOCATION,
4107 FIELD_DECL, get_identifier ("__skip"), ptr_type_node);
4108
4109 DECL_FIELD_CONTEXT (f_args) = record;
4110 DECL_ALIGN (f_args) = 128;
4111 DECL_USER_ALIGN (f_args) = 1;
4112
4113 DECL_FIELD_CONTEXT (f_skip) = record;
4114 DECL_ALIGN (f_skip) = 128;
4115 DECL_USER_ALIGN (f_skip) = 1;
4116
4117 TREE_CHAIN (record) = type_decl;
4118 TYPE_NAME (record) = type_decl;
4119 TYPE_FIELDS (record) = f_args;
4120 DECL_CHAIN (f_args) = f_skip;
4121
4122 /* We know this is being padded and we want it too. It is an internal
4123 type so hide the warnings from the user. */
4124 owp = warn_padded;
4125 warn_padded = false;
4126
4127 layout_type (record);
4128
4129 warn_padded = owp;
4130
4131 /* The correct type is an array type of one element. */
4132 return build_array_type (record, build_index_type (size_zero_node));
4133 }
4134
4135 /* Implement va_start by filling the va_list structure VALIST.
4136 NEXTARG points to the first anonymous stack argument.
4137
4138 The following global variables are used to initialize
4139 the va_list structure:
4140
4141 crtl->args.info;
4142 the CUMULATIVE_ARGS for this function
4143
4144 crtl->args.arg_offset_rtx:
4145 holds the offset of the first anonymous stack argument
4146 (relative to the virtual arg pointer). */
4147
4148 static void
4149 spu_va_start (tree valist, rtx nextarg)
4150 {
4151 tree f_args, f_skip;
4152 tree args, skip, t;
4153
4154 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4155 f_skip = DECL_CHAIN (f_args);
4156
4157 valist = build_va_arg_indirect_ref (valist);
4158 args =
4159 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4160 skip =
4161 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4162
4163 /* Find the __args area. */
4164 t = make_tree (TREE_TYPE (args), nextarg);
4165 if (crtl->args.pretend_args_size > 0)
4166 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (args), t,
4167 size_int (-STACK_POINTER_OFFSET));
4168 t = build2 (MODIFY_EXPR, TREE_TYPE (args), args, t);
4169 TREE_SIDE_EFFECTS (t) = 1;
4170 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4171
4172 /* Find the __skip area. */
4173 t = make_tree (TREE_TYPE (skip), virtual_incoming_args_rtx);
4174 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (skip), t,
4175 size_int (crtl->args.pretend_args_size
4176 - STACK_POINTER_OFFSET));
4177 t = build2 (MODIFY_EXPR, TREE_TYPE (skip), skip, t);
4178 TREE_SIDE_EFFECTS (t) = 1;
4179 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4180 }
4181
4182 /* Gimplify va_arg by updating the va_list structure
4183 VALIST as required to retrieve an argument of type
4184 TYPE, and returning that argument.
4185
4186 ret = va_arg(VALIST, TYPE);
4187
4188 generates code equivalent to:
4189
4190 paddedsize = (sizeof(TYPE) + 15) & -16;
4191 if (VALIST.__args + paddedsize > VALIST.__skip
4192 && VALIST.__args <= VALIST.__skip)
4193 addr = VALIST.__skip + 32;
4194 else
4195 addr = VALIST.__args;
4196 VALIST.__args = addr + paddedsize;
4197 ret = *(TYPE *)addr;
4198 */
4199 static tree
4200 spu_gimplify_va_arg_expr (tree valist, tree type, gimple_seq * pre_p,
4201 gimple_seq * post_p ATTRIBUTE_UNUSED)
4202 {
4203 tree f_args, f_skip;
4204 tree args, skip;
4205 HOST_WIDE_INT size, rsize;
4206 tree paddedsize, addr, tmp;
4207 bool pass_by_reference_p;
4208
4209 f_args = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4210 f_skip = DECL_CHAIN (f_args);
4211
4212 valist = build_simple_mem_ref (valist);
4213 args =
4214 build3 (COMPONENT_REF, TREE_TYPE (f_args), valist, f_args, NULL_TREE);
4215 skip =
4216 build3 (COMPONENT_REF, TREE_TYPE (f_skip), valist, f_skip, NULL_TREE);
4217
4218 addr = create_tmp_var (ptr_type_node, "va_arg");
4219
4220 /* if an object is dynamically sized, a pointer to it is passed
4221 instead of the object itself. */
4222 pass_by_reference_p = spu_pass_by_reference (NULL, TYPE_MODE (type), type,
4223 false);
4224 if (pass_by_reference_p)
4225 type = build_pointer_type (type);
4226 size = int_size_in_bytes (type);
4227 rsize = ((size + UNITS_PER_WORD - 1) / UNITS_PER_WORD) * UNITS_PER_WORD;
4228
4229 /* build conditional expression to calculate addr. The expression
4230 will be gimplified later. */
4231 paddedsize = size_int (rsize);
4232 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (args), paddedsize);
4233 tmp = build2 (TRUTH_AND_EXPR, boolean_type_node,
4234 build2 (GT_EXPR, boolean_type_node, tmp, unshare_expr (skip)),
4235 build2 (LE_EXPR, boolean_type_node, unshare_expr (args),
4236 unshare_expr (skip)));
4237
4238 tmp = build3 (COND_EXPR, ptr_type_node, tmp,
4239 build2 (POINTER_PLUS_EXPR, ptr_type_node, unshare_expr (skip),
4240 size_int (32)), unshare_expr (args));
4241
4242 gimplify_assign (addr, tmp, pre_p);
4243
4244 /* update VALIST.__args */
4245 tmp = build2 (POINTER_PLUS_EXPR, ptr_type_node, addr, paddedsize);
4246 gimplify_assign (unshare_expr (args), tmp, pre_p);
4247
4248 addr = fold_convert (build_pointer_type_for_mode (type, ptr_mode, true),
4249 addr);
4250
4251 if (pass_by_reference_p)
4252 addr = build_va_arg_indirect_ref (addr);
4253
4254 return build_va_arg_indirect_ref (addr);
4255 }
4256
4257 /* Save parameter registers starting with the register that corresponds
4258 to the first unnamed parameters. If the first unnamed parameter is
4259 in the stack then save no registers. Set pretend_args_size to the
4260 amount of space needed to save the registers. */
4261 void
4262 spu_setup_incoming_varargs (CUMULATIVE_ARGS * cum, enum machine_mode mode,
4263 tree type, int *pretend_size, int no_rtl)
4264 {
4265 if (!no_rtl)
4266 {
4267 rtx tmp;
4268 int regno;
4269 int offset;
4270 int ncum = *cum;
4271
4272 /* cum currently points to the last named argument, we want to
4273 start at the next argument. */
4274 spu_function_arg_advance (&ncum, mode, type, true);
4275
4276 offset = -STACK_POINTER_OFFSET;
4277 for (regno = ncum; regno < MAX_REGISTER_ARGS; regno++)
4278 {
4279 tmp = gen_frame_mem (V4SImode,
4280 plus_constant (virtual_incoming_args_rtx,
4281 offset));
4282 emit_move_insn (tmp,
4283 gen_rtx_REG (V4SImode, FIRST_ARG_REGNUM + regno));
4284 offset += 16;
4285 }
4286 *pretend_size = offset + STACK_POINTER_OFFSET;
4287 }
4288 }
4289 \f
4290 void
4291 spu_conditional_register_usage (void)
4292 {
4293 if (flag_pic)
4294 {
4295 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4296 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
4297 }
4298 }
4299
4300 /* This is called any time we inspect the alignment of a register for
4301 addresses. */
4302 static int
4303 reg_aligned_for_addr (rtx x)
4304 {
4305 int regno =
4306 REGNO (x) < FIRST_PSEUDO_REGISTER ? ORIGINAL_REGNO (x) : REGNO (x);
4307 return REGNO_POINTER_ALIGN (regno) >= 128;
4308 }
4309
4310 /* Encode symbol attributes (local vs. global, tls model) of a SYMBOL_REF
4311 into its SYMBOL_REF_FLAGS. */
4312 static void
4313 spu_encode_section_info (tree decl, rtx rtl, int first)
4314 {
4315 default_encode_section_info (decl, rtl, first);
4316
4317 /* If a variable has a forced alignment to < 16 bytes, mark it with
4318 SYMBOL_FLAG_ALIGN1. */
4319 if (TREE_CODE (decl) == VAR_DECL
4320 && DECL_USER_ALIGN (decl) && DECL_ALIGN (decl) < 128)
4321 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_ALIGN1;
4322 }
4323
4324 /* Return TRUE if we are certain the mem refers to a complete object
4325 which is both 16-byte aligned and padded to a 16-byte boundary. This
4326 would make it safe to store with a single instruction.
4327 We guarantee the alignment and padding for static objects by aligning
4328 all of them to 16-bytes. (DATA_ALIGNMENT and CONSTANT_ALIGNMENT.)
4329 FIXME: We currently cannot guarantee this for objects on the stack
4330 because assign_parm_setup_stack calls assign_stack_local with the
4331 alignment of the parameter mode and in that case the alignment never
4332 gets adjusted by LOCAL_ALIGNMENT. */
4333 static int
4334 store_with_one_insn_p (rtx mem)
4335 {
4336 enum machine_mode mode = GET_MODE (mem);
4337 rtx addr = XEXP (mem, 0);
4338 if (mode == BLKmode)
4339 return 0;
4340 if (GET_MODE_SIZE (mode) >= 16)
4341 return 1;
4342 /* Only static objects. */
4343 if (GET_CODE (addr) == SYMBOL_REF)
4344 {
4345 /* We use the associated declaration to make sure the access is
4346 referring to the whole object.
4347 We check both MEM_EXPR and and SYMBOL_REF_DECL. I'm not sure
4348 if it is necessary. Will there be cases where one exists, and
4349 the other does not? Will there be cases where both exist, but
4350 have different types? */
4351 tree decl = MEM_EXPR (mem);
4352 if (decl
4353 && TREE_CODE (decl) == VAR_DECL
4354 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4355 return 1;
4356 decl = SYMBOL_REF_DECL (addr);
4357 if (decl
4358 && TREE_CODE (decl) == VAR_DECL
4359 && GET_MODE (mem) == TYPE_MODE (TREE_TYPE (decl)))
4360 return 1;
4361 }
4362 return 0;
4363 }
4364
4365 /* Return 1 when the address is not valid for a simple load and store as
4366 required by the '_mov*' patterns. We could make this less strict
4367 for loads, but we prefer mem's to look the same so they are more
4368 likely to be merged. */
4369 static int
4370 address_needs_split (rtx mem)
4371 {
4372 if (GET_MODE_SIZE (GET_MODE (mem)) < 16
4373 && (GET_MODE_SIZE (GET_MODE (mem)) < 4
4374 || !(store_with_one_insn_p (mem)
4375 || mem_is_padded_component_ref (mem))))
4376 return 1;
4377
4378 return 0;
4379 }
4380
4381 static GTY(()) rtx cache_fetch; /* __cache_fetch function */
4382 static GTY(()) rtx cache_fetch_dirty; /* __cache_fetch_dirty function */
4383 static alias_set_type ea_alias_set = -1; /* alias set for __ea memory */
4384
4385 /* MEM is known to be an __ea qualified memory access. Emit a call to
4386 fetch the ppu memory to local store, and return its address in local
4387 store. */
4388
4389 static void
4390 ea_load_store (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4391 {
4392 if (is_store)
4393 {
4394 rtx ndirty = GEN_INT (GET_MODE_SIZE (GET_MODE (mem)));
4395 if (!cache_fetch_dirty)
4396 cache_fetch_dirty = init_one_libfunc ("__cache_fetch_dirty");
4397 emit_library_call_value (cache_fetch_dirty, data_addr, LCT_NORMAL, Pmode,
4398 2, ea_addr, EAmode, ndirty, SImode);
4399 }
4400 else
4401 {
4402 if (!cache_fetch)
4403 cache_fetch = init_one_libfunc ("__cache_fetch");
4404 emit_library_call_value (cache_fetch, data_addr, LCT_NORMAL, Pmode,
4405 1, ea_addr, EAmode);
4406 }
4407 }
4408
4409 /* Like ea_load_store, but do the cache tag comparison and, for stores,
4410 dirty bit marking, inline.
4411
4412 The cache control data structure is an array of
4413
4414 struct __cache_tag_array
4415 {
4416 unsigned int tag_lo[4];
4417 unsigned int tag_hi[4];
4418 void *data_pointer[4];
4419 int reserved[4];
4420 vector unsigned short dirty_bits[4];
4421 } */
4422
4423 static void
4424 ea_load_store_inline (rtx mem, bool is_store, rtx ea_addr, rtx data_addr)
4425 {
4426 rtx ea_addr_si;
4427 HOST_WIDE_INT v;
4428 rtx tag_size_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array_size");
4429 rtx tag_arr_sym = gen_rtx_SYMBOL_REF (Pmode, "__cache_tag_array");
4430 rtx index_mask = gen_reg_rtx (SImode);
4431 rtx tag_arr = gen_reg_rtx (Pmode);
4432 rtx splat_mask = gen_reg_rtx (TImode);
4433 rtx splat = gen_reg_rtx (V4SImode);
4434 rtx splat_hi = NULL_RTX;
4435 rtx tag_index = gen_reg_rtx (Pmode);
4436 rtx block_off = gen_reg_rtx (SImode);
4437 rtx tag_addr = gen_reg_rtx (Pmode);
4438 rtx tag = gen_reg_rtx (V4SImode);
4439 rtx cache_tag = gen_reg_rtx (V4SImode);
4440 rtx cache_tag_hi = NULL_RTX;
4441 rtx cache_ptrs = gen_reg_rtx (TImode);
4442 rtx cache_ptrs_si = gen_reg_rtx (SImode);
4443 rtx tag_equal = gen_reg_rtx (V4SImode);
4444 rtx tag_equal_hi = NULL_RTX;
4445 rtx tag_eq_pack = gen_reg_rtx (V4SImode);
4446 rtx tag_eq_pack_si = gen_reg_rtx (SImode);
4447 rtx eq_index = gen_reg_rtx (SImode);
4448 rtx bcomp, hit_label, hit_ref, cont_label, insn;
4449
4450 if (spu_ea_model != 32)
4451 {
4452 splat_hi = gen_reg_rtx (V4SImode);
4453 cache_tag_hi = gen_reg_rtx (V4SImode);
4454 tag_equal_hi = gen_reg_rtx (V4SImode);
4455 }
4456
4457 emit_move_insn (index_mask, plus_constant (tag_size_sym, -128));
4458 emit_move_insn (tag_arr, tag_arr_sym);
4459 v = 0x0001020300010203LL;
4460 emit_move_insn (splat_mask, immed_double_const (v, v, TImode));
4461 ea_addr_si = ea_addr;
4462 if (spu_ea_model != 32)
4463 ea_addr_si = convert_to_mode (SImode, ea_addr, 1);
4464
4465 /* tag_index = ea_addr & (tag_array_size - 128) */
4466 emit_insn (gen_andsi3 (tag_index, ea_addr_si, index_mask));
4467
4468 /* splat ea_addr to all 4 slots. */
4469 emit_insn (gen_shufb (splat, ea_addr_si, ea_addr_si, splat_mask));
4470 /* Similarly for high 32 bits of ea_addr. */
4471 if (spu_ea_model != 32)
4472 emit_insn (gen_shufb (splat_hi, ea_addr, ea_addr, splat_mask));
4473
4474 /* block_off = ea_addr & 127 */
4475 emit_insn (gen_andsi3 (block_off, ea_addr_si, spu_const (SImode, 127)));
4476
4477 /* tag_addr = tag_arr + tag_index */
4478 emit_insn (gen_addsi3 (tag_addr, tag_arr, tag_index));
4479
4480 /* Read cache tags. */
4481 emit_move_insn (cache_tag, gen_rtx_MEM (V4SImode, tag_addr));
4482 if (spu_ea_model != 32)
4483 emit_move_insn (cache_tag_hi, gen_rtx_MEM (V4SImode,
4484 plus_constant (tag_addr, 16)));
4485
4486 /* tag = ea_addr & -128 */
4487 emit_insn (gen_andv4si3 (tag, splat, spu_const (V4SImode, -128)));
4488
4489 /* Read all four cache data pointers. */
4490 emit_move_insn (cache_ptrs, gen_rtx_MEM (TImode,
4491 plus_constant (tag_addr, 32)));
4492
4493 /* Compare tags. */
4494 emit_insn (gen_ceq_v4si (tag_equal, tag, cache_tag));
4495 if (spu_ea_model != 32)
4496 {
4497 emit_insn (gen_ceq_v4si (tag_equal_hi, splat_hi, cache_tag_hi));
4498 emit_insn (gen_andv4si3 (tag_equal, tag_equal, tag_equal_hi));
4499 }
4500
4501 /* At most one of the tags compare equal, so tag_equal has one
4502 32-bit slot set to all 1's, with the other slots all zero.
4503 gbb picks off low bit from each byte in the 128-bit registers,
4504 so tag_eq_pack is one of 0xf000, 0x0f00, 0x00f0, 0x000f, assuming
4505 we have a hit. */
4506 emit_insn (gen_spu_gbb (tag_eq_pack, spu_gen_subreg (V16QImode, tag_equal)));
4507 emit_insn (gen_spu_convert (tag_eq_pack_si, tag_eq_pack));
4508
4509 /* So counting leading zeros will set eq_index to 16, 20, 24 or 28. */
4510 emit_insn (gen_clzsi2 (eq_index, tag_eq_pack_si));
4511
4512 /* Allowing us to rotate the corresponding cache data pointer to slot0.
4513 (rotating eq_index mod 16 bytes). */
4514 emit_insn (gen_rotqby_ti (cache_ptrs, cache_ptrs, eq_index));
4515 emit_insn (gen_spu_convert (cache_ptrs_si, cache_ptrs));
4516
4517 /* Add block offset to form final data address. */
4518 emit_insn (gen_addsi3 (data_addr, cache_ptrs_si, block_off));
4519
4520 /* Check that we did hit. */
4521 hit_label = gen_label_rtx ();
4522 hit_ref = gen_rtx_LABEL_REF (VOIDmode, hit_label);
4523 bcomp = gen_rtx_NE (SImode, tag_eq_pack_si, const0_rtx);
4524 insn = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx,
4525 gen_rtx_IF_THEN_ELSE (VOIDmode, bcomp,
4526 hit_ref, pc_rtx)));
4527 /* Say that this branch is very likely to happen. */
4528 v = REG_BR_PROB_BASE - REG_BR_PROB_BASE / 100 - 1;
4529 add_reg_note (insn, REG_BR_PROB, GEN_INT (v));
4530
4531 ea_load_store (mem, is_store, ea_addr, data_addr);
4532 cont_label = gen_label_rtx ();
4533 emit_jump_insn (gen_jump (cont_label));
4534 emit_barrier ();
4535
4536 emit_label (hit_label);
4537
4538 if (is_store)
4539 {
4540 HOST_WIDE_INT v_hi;
4541 rtx dirty_bits = gen_reg_rtx (TImode);
4542 rtx dirty_off = gen_reg_rtx (SImode);
4543 rtx dirty_128 = gen_reg_rtx (TImode);
4544 rtx neg_block_off = gen_reg_rtx (SImode);
4545
4546 /* Set up mask with one dirty bit per byte of the mem we are
4547 writing, starting from top bit. */
4548 v_hi = v = -1;
4549 v <<= (128 - GET_MODE_SIZE (GET_MODE (mem))) & 63;
4550 if ((128 - GET_MODE_SIZE (GET_MODE (mem))) >= 64)
4551 {
4552 v_hi = v;
4553 v = 0;
4554 }
4555 emit_move_insn (dirty_bits, immed_double_const (v, v_hi, TImode));
4556
4557 /* Form index into cache dirty_bits. eq_index is one of
4558 0x10, 0x14, 0x18 or 0x1c. Multiplying by 4 gives us
4559 0x40, 0x50, 0x60 or 0x70 which just happens to be the
4560 offset to each of the four dirty_bits elements. */
4561 emit_insn (gen_ashlsi3 (dirty_off, eq_index, spu_const (SImode, 2)));
4562
4563 emit_insn (gen_spu_lqx (dirty_128, tag_addr, dirty_off));
4564
4565 /* Rotate bit mask to proper bit. */
4566 emit_insn (gen_negsi2 (neg_block_off, block_off));
4567 emit_insn (gen_rotqbybi_ti (dirty_bits, dirty_bits, neg_block_off));
4568 emit_insn (gen_rotqbi_ti (dirty_bits, dirty_bits, neg_block_off));
4569
4570 /* Or in the new dirty bits. */
4571 emit_insn (gen_iorti3 (dirty_128, dirty_bits, dirty_128));
4572
4573 /* Store. */
4574 emit_insn (gen_spu_stqx (dirty_128, tag_addr, dirty_off));
4575 }
4576
4577 emit_label (cont_label);
4578 }
4579
4580 static rtx
4581 expand_ea_mem (rtx mem, bool is_store)
4582 {
4583 rtx ea_addr;
4584 rtx data_addr = gen_reg_rtx (Pmode);
4585 rtx new_mem;
4586
4587 ea_addr = force_reg (EAmode, XEXP (mem, 0));
4588 if (optimize_size || optimize == 0)
4589 ea_load_store (mem, is_store, ea_addr, data_addr);
4590 else
4591 ea_load_store_inline (mem, is_store, ea_addr, data_addr);
4592
4593 if (ea_alias_set == -1)
4594 ea_alias_set = new_alias_set ();
4595
4596 /* We generate a new MEM RTX to refer to the copy of the data
4597 in the cache. We do not copy memory attributes (except the
4598 alignment) from the original MEM, as they may no longer apply
4599 to the cache copy. */
4600 new_mem = gen_rtx_MEM (GET_MODE (mem), data_addr);
4601 set_mem_alias_set (new_mem, ea_alias_set);
4602 set_mem_align (new_mem, MIN (MEM_ALIGN (mem), 128 * 8));
4603
4604 return new_mem;
4605 }
4606
4607 int
4608 spu_expand_mov (rtx * ops, enum machine_mode mode)
4609 {
4610 if (GET_CODE (ops[0]) == SUBREG && !valid_subreg (ops[0]))
4611 abort ();
4612
4613 if (GET_CODE (ops[1]) == SUBREG && !valid_subreg (ops[1]))
4614 {
4615 rtx from = SUBREG_REG (ops[1]);
4616 enum machine_mode imode = int_mode_for_mode (GET_MODE (from));
4617
4618 gcc_assert (GET_MODE_CLASS (mode) == MODE_INT
4619 && GET_MODE_CLASS (imode) == MODE_INT
4620 && subreg_lowpart_p (ops[1]));
4621
4622 if (GET_MODE_SIZE (imode) < 4)
4623 imode = SImode;
4624 if (imode != GET_MODE (from))
4625 from = gen_rtx_SUBREG (imode, from, 0);
4626
4627 if (GET_MODE_SIZE (mode) < GET_MODE_SIZE (imode))
4628 {
4629 enum insn_code icode = convert_optab_handler (trunc_optab,
4630 mode, imode);
4631 emit_insn (GEN_FCN (icode) (ops[0], from));
4632 }
4633 else
4634 emit_insn (gen_extend_insn (ops[0], from, mode, imode, 1));
4635 return 1;
4636 }
4637
4638 /* At least one of the operands needs to be a register. */
4639 if ((reload_in_progress | reload_completed) == 0
4640 && !register_operand (ops[0], mode) && !register_operand (ops[1], mode))
4641 {
4642 rtx temp = force_reg (mode, ops[1]);
4643 emit_move_insn (ops[0], temp);
4644 return 1;
4645 }
4646 if (reload_in_progress || reload_completed)
4647 {
4648 if (CONSTANT_P (ops[1]))
4649 return spu_split_immediate (ops);
4650 return 0;
4651 }
4652
4653 /* Catch the SImode immediates greater than 0x7fffffff, and sign
4654 extend them. */
4655 if (GET_CODE (ops[1]) == CONST_INT)
4656 {
4657 HOST_WIDE_INT val = trunc_int_for_mode (INTVAL (ops[1]), mode);
4658 if (val != INTVAL (ops[1]))
4659 {
4660 emit_move_insn (ops[0], GEN_INT (val));
4661 return 1;
4662 }
4663 }
4664 if (MEM_P (ops[0]))
4665 {
4666 if (MEM_ADDR_SPACE (ops[0]))
4667 ops[0] = expand_ea_mem (ops[0], true);
4668 return spu_split_store (ops);
4669 }
4670 if (MEM_P (ops[1]))
4671 {
4672 if (MEM_ADDR_SPACE (ops[1]))
4673 ops[1] = expand_ea_mem (ops[1], false);
4674 return spu_split_load (ops);
4675 }
4676
4677 return 0;
4678 }
4679
4680 static void
4681 spu_convert_move (rtx dst, rtx src)
4682 {
4683 enum machine_mode mode = GET_MODE (dst);
4684 enum machine_mode int_mode = mode_for_size (GET_MODE_BITSIZE (mode), MODE_INT, 0);
4685 rtx reg;
4686 gcc_assert (GET_MODE (src) == TImode);
4687 reg = int_mode != mode ? gen_reg_rtx (int_mode) : dst;
4688 emit_insn (gen_rtx_SET (VOIDmode, reg,
4689 gen_rtx_TRUNCATE (int_mode,
4690 gen_rtx_LSHIFTRT (TImode, src,
4691 GEN_INT (int_mode == DImode ? 64 : 96)))));
4692 if (int_mode != mode)
4693 {
4694 reg = simplify_gen_subreg (mode, reg, int_mode, 0);
4695 emit_move_insn (dst, reg);
4696 }
4697 }
4698
4699 /* Load TImode values into DST0 and DST1 (when it is non-NULL) using
4700 the address from SRC and SRC+16. Return a REG or CONST_INT that
4701 specifies how many bytes to rotate the loaded registers, plus any
4702 extra from EXTRA_ROTQBY. The address and rotate amounts are
4703 normalized to improve merging of loads and rotate computations. */
4704 static rtx
4705 spu_expand_load (rtx dst0, rtx dst1, rtx src, int extra_rotby)
4706 {
4707 rtx addr = XEXP (src, 0);
4708 rtx p0, p1, rot, addr0, addr1;
4709 int rot_amt;
4710
4711 rot = 0;
4712 rot_amt = 0;
4713
4714 if (MEM_ALIGN (src) >= 128)
4715 /* Address is already aligned; simply perform a TImode load. */ ;
4716 else if (GET_CODE (addr) == PLUS)
4717 {
4718 /* 8 cases:
4719 aligned reg + aligned reg => lqx
4720 aligned reg + unaligned reg => lqx, rotqby
4721 aligned reg + aligned const => lqd
4722 aligned reg + unaligned const => lqd, rotqbyi
4723 unaligned reg + aligned reg => lqx, rotqby
4724 unaligned reg + unaligned reg => lqx, a, rotqby (1 scratch)
4725 unaligned reg + aligned const => lqd, rotqby
4726 unaligned reg + unaligned const -> not allowed by legitimate address
4727 */
4728 p0 = XEXP (addr, 0);
4729 p1 = XEXP (addr, 1);
4730 if (!reg_aligned_for_addr (p0))
4731 {
4732 if (REG_P (p1) && !reg_aligned_for_addr (p1))
4733 {
4734 rot = gen_reg_rtx (SImode);
4735 emit_insn (gen_addsi3 (rot, p0, p1));
4736 }
4737 else if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4738 {
4739 if (INTVAL (p1) > 0
4740 && REG_POINTER (p0)
4741 && INTVAL (p1) * BITS_PER_UNIT
4742 < REGNO_POINTER_ALIGN (REGNO (p0)))
4743 {
4744 rot = gen_reg_rtx (SImode);
4745 emit_insn (gen_addsi3 (rot, p0, p1));
4746 addr = p0;
4747 }
4748 else
4749 {
4750 rtx x = gen_reg_rtx (SImode);
4751 emit_move_insn (x, p1);
4752 if (!spu_arith_operand (p1, SImode))
4753 p1 = x;
4754 rot = gen_reg_rtx (SImode);
4755 emit_insn (gen_addsi3 (rot, p0, p1));
4756 addr = gen_rtx_PLUS (Pmode, p0, x);
4757 }
4758 }
4759 else
4760 rot = p0;
4761 }
4762 else
4763 {
4764 if (GET_CODE (p1) == CONST_INT && (INTVAL (p1) & 15))
4765 {
4766 rot_amt = INTVAL (p1) & 15;
4767 if (INTVAL (p1) & -16)
4768 {
4769 p1 = GEN_INT (INTVAL (p1) & -16);
4770 addr = gen_rtx_PLUS (SImode, p0, p1);
4771 }
4772 else
4773 addr = p0;
4774 }
4775 else if (REG_P (p1) && !reg_aligned_for_addr (p1))
4776 rot = p1;
4777 }
4778 }
4779 else if (REG_P (addr))
4780 {
4781 if (!reg_aligned_for_addr (addr))
4782 rot = addr;
4783 }
4784 else if (GET_CODE (addr) == CONST)
4785 {
4786 if (GET_CODE (XEXP (addr, 0)) == PLUS
4787 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4788 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4789 {
4790 rot_amt = INTVAL (XEXP (XEXP (addr, 0), 1));
4791 if (rot_amt & -16)
4792 addr = gen_rtx_CONST (Pmode,
4793 gen_rtx_PLUS (Pmode,
4794 XEXP (XEXP (addr, 0), 0),
4795 GEN_INT (rot_amt & -16)));
4796 else
4797 addr = XEXP (XEXP (addr, 0), 0);
4798 }
4799 else
4800 {
4801 rot = gen_reg_rtx (Pmode);
4802 emit_move_insn (rot, addr);
4803 }
4804 }
4805 else if (GET_CODE (addr) == CONST_INT)
4806 {
4807 rot_amt = INTVAL (addr);
4808 addr = GEN_INT (rot_amt & -16);
4809 }
4810 else if (!ALIGNED_SYMBOL_REF_P (addr))
4811 {
4812 rot = gen_reg_rtx (Pmode);
4813 emit_move_insn (rot, addr);
4814 }
4815
4816 rot_amt += extra_rotby;
4817
4818 rot_amt &= 15;
4819
4820 if (rot && rot_amt)
4821 {
4822 rtx x = gen_reg_rtx (SImode);
4823 emit_insn (gen_addsi3 (x, rot, GEN_INT (rot_amt)));
4824 rot = x;
4825 rot_amt = 0;
4826 }
4827 if (!rot && rot_amt)
4828 rot = GEN_INT (rot_amt);
4829
4830 addr0 = copy_rtx (addr);
4831 addr0 = gen_rtx_AND (SImode, copy_rtx (addr), GEN_INT (-16));
4832 emit_insn (gen__movti (dst0, change_address (src, TImode, addr0)));
4833
4834 if (dst1)
4835 {
4836 addr1 = plus_constant (copy_rtx (addr), 16);
4837 addr1 = gen_rtx_AND (SImode, addr1, GEN_INT (-16));
4838 emit_insn (gen__movti (dst1, change_address (src, TImode, addr1)));
4839 }
4840
4841 return rot;
4842 }
4843
4844 int
4845 spu_split_load (rtx * ops)
4846 {
4847 enum machine_mode mode = GET_MODE (ops[0]);
4848 rtx addr, load, rot;
4849 int rot_amt;
4850
4851 if (GET_MODE_SIZE (mode) >= 16)
4852 return 0;
4853
4854 addr = XEXP (ops[1], 0);
4855 gcc_assert (GET_CODE (addr) != AND);
4856
4857 if (!address_needs_split (ops[1]))
4858 {
4859 ops[1] = change_address (ops[1], TImode, addr);
4860 load = gen_reg_rtx (TImode);
4861 emit_insn (gen__movti (load, ops[1]));
4862 spu_convert_move (ops[0], load);
4863 return 1;
4864 }
4865
4866 rot_amt = GET_MODE_SIZE (mode) < 4 ? GET_MODE_SIZE (mode) - 4 : 0;
4867
4868 load = gen_reg_rtx (TImode);
4869 rot = spu_expand_load (load, 0, ops[1], rot_amt);
4870
4871 if (rot)
4872 emit_insn (gen_rotqby_ti (load, load, rot));
4873
4874 spu_convert_move (ops[0], load);
4875 return 1;
4876 }
4877
4878 int
4879 spu_split_store (rtx * ops)
4880 {
4881 enum machine_mode mode = GET_MODE (ops[0]);
4882 rtx reg;
4883 rtx addr, p0, p1, p1_lo, smem;
4884 int aform;
4885 int scalar;
4886
4887 if (GET_MODE_SIZE (mode) >= 16)
4888 return 0;
4889
4890 addr = XEXP (ops[0], 0);
4891 gcc_assert (GET_CODE (addr) != AND);
4892
4893 if (!address_needs_split (ops[0]))
4894 {
4895 reg = gen_reg_rtx (TImode);
4896 emit_insn (gen_spu_convert (reg, ops[1]));
4897 ops[0] = change_address (ops[0], TImode, addr);
4898 emit_move_insn (ops[0], reg);
4899 return 1;
4900 }
4901
4902 if (GET_CODE (addr) == PLUS)
4903 {
4904 /* 8 cases:
4905 aligned reg + aligned reg => lqx, c?x, shuf, stqx
4906 aligned reg + unaligned reg => lqx, c?x, shuf, stqx
4907 aligned reg + aligned const => lqd, c?d, shuf, stqx
4908 aligned reg + unaligned const => lqd, c?d, shuf, stqx
4909 unaligned reg + aligned reg => lqx, c?x, shuf, stqx
4910 unaligned reg + unaligned reg => lqx, c?x, shuf, stqx
4911 unaligned reg + aligned const => lqd, c?d, shuf, stqx
4912 unaligned reg + unaligned const -> lqx, c?d, shuf, stqx
4913 */
4914 aform = 0;
4915 p0 = XEXP (addr, 0);
4916 p1 = p1_lo = XEXP (addr, 1);
4917 if (REG_P (p0) && GET_CODE (p1) == CONST_INT)
4918 {
4919 p1_lo = GEN_INT (INTVAL (p1) & 15);
4920 if (reg_aligned_for_addr (p0))
4921 {
4922 p1 = GEN_INT (INTVAL (p1) & -16);
4923 if (p1 == const0_rtx)
4924 addr = p0;
4925 else
4926 addr = gen_rtx_PLUS (SImode, p0, p1);
4927 }
4928 else
4929 {
4930 rtx x = gen_reg_rtx (SImode);
4931 emit_move_insn (x, p1);
4932 addr = gen_rtx_PLUS (SImode, p0, x);
4933 }
4934 }
4935 }
4936 else if (REG_P (addr))
4937 {
4938 aform = 0;
4939 p0 = addr;
4940 p1 = p1_lo = const0_rtx;
4941 }
4942 else
4943 {
4944 aform = 1;
4945 p0 = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
4946 p1 = 0; /* aform doesn't use p1 */
4947 p1_lo = addr;
4948 if (ALIGNED_SYMBOL_REF_P (addr))
4949 p1_lo = const0_rtx;
4950 else if (GET_CODE (addr) == CONST
4951 && GET_CODE (XEXP (addr, 0)) == PLUS
4952 && ALIGNED_SYMBOL_REF_P (XEXP (XEXP (addr, 0), 0))
4953 && GET_CODE (XEXP (XEXP (addr, 0), 1)) == CONST_INT)
4954 {
4955 HOST_WIDE_INT v = INTVAL (XEXP (XEXP (addr, 0), 1));
4956 if ((v & -16) != 0)
4957 addr = gen_rtx_CONST (Pmode,
4958 gen_rtx_PLUS (Pmode,
4959 XEXP (XEXP (addr, 0), 0),
4960 GEN_INT (v & -16)));
4961 else
4962 addr = XEXP (XEXP (addr, 0), 0);
4963 p1_lo = GEN_INT (v & 15);
4964 }
4965 else if (GET_CODE (addr) == CONST_INT)
4966 {
4967 p1_lo = GEN_INT (INTVAL (addr) & 15);
4968 addr = GEN_INT (INTVAL (addr) & -16);
4969 }
4970 else
4971 {
4972 p1_lo = gen_reg_rtx (SImode);
4973 emit_move_insn (p1_lo, addr);
4974 }
4975 }
4976
4977 reg = gen_reg_rtx (TImode);
4978
4979 scalar = store_with_one_insn_p (ops[0]);
4980 if (!scalar)
4981 {
4982 /* We could copy the flags from the ops[0] MEM to mem here,
4983 We don't because we want this load to be optimized away if
4984 possible, and copying the flags will prevent that in certain
4985 cases, e.g. consider the volatile flag. */
4986
4987 rtx pat = gen_reg_rtx (TImode);
4988 rtx lmem = change_address (ops[0], TImode, copy_rtx (addr));
4989 set_mem_alias_set (lmem, 0);
4990 emit_insn (gen_movti (reg, lmem));
4991
4992 if (!p0 || reg_aligned_for_addr (p0))
4993 p0 = stack_pointer_rtx;
4994 if (!p1_lo)
4995 p1_lo = const0_rtx;
4996
4997 emit_insn (gen_cpat (pat, p0, p1_lo, GEN_INT (GET_MODE_SIZE (mode))));
4998 emit_insn (gen_shufb (reg, ops[1], reg, pat));
4999 }
5000 else
5001 {
5002 if (GET_CODE (ops[1]) == REG)
5003 emit_insn (gen_spu_convert (reg, ops[1]));
5004 else if (GET_CODE (ops[1]) == SUBREG)
5005 emit_insn (gen_spu_convert (reg, SUBREG_REG (ops[1])));
5006 else
5007 abort ();
5008 }
5009
5010 if (GET_MODE_SIZE (mode) < 4 && scalar)
5011 emit_insn (gen_ashlti3
5012 (reg, reg, GEN_INT (32 - GET_MODE_BITSIZE (mode))));
5013
5014 smem = change_address (ops[0], TImode, copy_rtx (addr));
5015 /* We can't use the previous alias set because the memory has changed
5016 size and can potentially overlap objects of other types. */
5017 set_mem_alias_set (smem, 0);
5018
5019 emit_insn (gen_movti (smem, reg));
5020 return 1;
5021 }
5022
5023 /* Return TRUE if X is MEM which is a struct member reference
5024 and the member can safely be loaded and stored with a single
5025 instruction because it is padded. */
5026 static int
5027 mem_is_padded_component_ref (rtx x)
5028 {
5029 tree t = MEM_EXPR (x);
5030 tree r;
5031 if (!t || TREE_CODE (t) != COMPONENT_REF)
5032 return 0;
5033 t = TREE_OPERAND (t, 1);
5034 if (!t || TREE_CODE (t) != FIELD_DECL
5035 || DECL_ALIGN (t) < 128 || AGGREGATE_TYPE_P (TREE_TYPE (t)))
5036 return 0;
5037 /* Only do this for RECORD_TYPEs, not UNION_TYPEs. */
5038 r = DECL_FIELD_CONTEXT (t);
5039 if (!r || TREE_CODE (r) != RECORD_TYPE)
5040 return 0;
5041 /* Make sure they are the same mode */
5042 if (GET_MODE (x) != TYPE_MODE (TREE_TYPE (t)))
5043 return 0;
5044 /* If there are no following fields then the field alignment assures
5045 the structure is padded to the alignment which means this field is
5046 padded too. */
5047 if (TREE_CHAIN (t) == 0)
5048 return 1;
5049 /* If the following field is also aligned then this field will be
5050 padded. */
5051 t = TREE_CHAIN (t);
5052 if (TREE_CODE (t) == FIELD_DECL && DECL_ALIGN (t) >= 128)
5053 return 1;
5054 return 0;
5055 }
5056
5057 /* Parse the -mfixed-range= option string. */
5058 static void
5059 fix_range (const char *const_str)
5060 {
5061 int i, first, last;
5062 char *str, *dash, *comma;
5063
5064 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
5065 REG2 are either register names or register numbers. The effect
5066 of this option is to mark the registers in the range from REG1 to
5067 REG2 as ``fixed'' so they won't be used by the compiler. */
5068
5069 i = strlen (const_str);
5070 str = (char *) alloca (i + 1);
5071 memcpy (str, const_str, i + 1);
5072
5073 while (1)
5074 {
5075 dash = strchr (str, '-');
5076 if (!dash)
5077 {
5078 warning (0, "value of -mfixed-range must have form REG1-REG2");
5079 return;
5080 }
5081 *dash = '\0';
5082 comma = strchr (dash + 1, ',');
5083 if (comma)
5084 *comma = '\0';
5085
5086 first = decode_reg_name (str);
5087 if (first < 0)
5088 {
5089 warning (0, "unknown register name: %s", str);
5090 return;
5091 }
5092
5093 last = decode_reg_name (dash + 1);
5094 if (last < 0)
5095 {
5096 warning (0, "unknown register name: %s", dash + 1);
5097 return;
5098 }
5099
5100 *dash = '-';
5101
5102 if (first > last)
5103 {
5104 warning (0, "%s-%s is an empty range", str, dash + 1);
5105 return;
5106 }
5107
5108 for (i = first; i <= last; ++i)
5109 fixed_regs[i] = call_used_regs[i] = 1;
5110
5111 if (!comma)
5112 break;
5113
5114 *comma = ',';
5115 str = comma + 1;
5116 }
5117 }
5118
5119 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5120 can be generated using the fsmbi instruction. */
5121 int
5122 fsmbi_const_p (rtx x)
5123 {
5124 if (CONSTANT_P (x))
5125 {
5126 /* We can always choose TImode for CONST_INT because the high bits
5127 of an SImode will always be all 1s, i.e., valid for fsmbi. */
5128 enum immediate_class c = classify_immediate (x, TImode);
5129 return c == IC_FSMBI || (!epilogue_completed && c == IC_FSMBI2);
5130 }
5131 return 0;
5132 }
5133
5134 /* Return TRUE if x is a CONST_INT, CONST_DOUBLE or CONST_VECTOR that
5135 can be generated using the cbd, chd, cwd or cdd instruction. */
5136 int
5137 cpat_const_p (rtx x, enum machine_mode mode)
5138 {
5139 if (CONSTANT_P (x))
5140 {
5141 enum immediate_class c = classify_immediate (x, mode);
5142 return c == IC_CPAT;
5143 }
5144 return 0;
5145 }
5146
5147 rtx
5148 gen_cpat_const (rtx * ops)
5149 {
5150 unsigned char dst[16];
5151 int i, offset, shift, isize;
5152 if (GET_CODE (ops[3]) != CONST_INT
5153 || GET_CODE (ops[2]) != CONST_INT
5154 || (GET_CODE (ops[1]) != CONST_INT
5155 && GET_CODE (ops[1]) != REG))
5156 return 0;
5157 if (GET_CODE (ops[1]) == REG
5158 && (!REG_POINTER (ops[1])
5159 || REGNO_POINTER_ALIGN (ORIGINAL_REGNO (ops[1])) < 128))
5160 return 0;
5161
5162 for (i = 0; i < 16; i++)
5163 dst[i] = i + 16;
5164 isize = INTVAL (ops[3]);
5165 if (isize == 1)
5166 shift = 3;
5167 else if (isize == 2)
5168 shift = 2;
5169 else
5170 shift = 0;
5171 offset = (INTVAL (ops[2]) +
5172 (GET_CODE (ops[1]) ==
5173 CONST_INT ? INTVAL (ops[1]) : 0)) & 15;
5174 for (i = 0; i < isize; i++)
5175 dst[offset + i] = i + shift;
5176 return array_to_constant (TImode, dst);
5177 }
5178
5179 /* Convert a CONST_INT, CONST_DOUBLE, or CONST_VECTOR into a 16 byte
5180 array. Use MODE for CONST_INT's. When the constant's mode is smaller
5181 than 16 bytes, the value is repeated across the rest of the array. */
5182 void
5183 constant_to_array (enum machine_mode mode, rtx x, unsigned char arr[16])
5184 {
5185 HOST_WIDE_INT val;
5186 int i, j, first;
5187
5188 memset (arr, 0, 16);
5189 mode = GET_MODE (x) != VOIDmode ? GET_MODE (x) : mode;
5190 if (GET_CODE (x) == CONST_INT
5191 || (GET_CODE (x) == CONST_DOUBLE
5192 && (mode == SFmode || mode == DFmode)))
5193 {
5194 gcc_assert (mode != VOIDmode && mode != BLKmode);
5195
5196 if (GET_CODE (x) == CONST_DOUBLE)
5197 val = const_double_to_hwint (x);
5198 else
5199 val = INTVAL (x);
5200 first = GET_MODE_SIZE (mode) - 1;
5201 for (i = first; i >= 0; i--)
5202 {
5203 arr[i] = val & 0xff;
5204 val >>= 8;
5205 }
5206 /* Splat the constant across the whole array. */
5207 for (j = 0, i = first + 1; i < 16; i++)
5208 {
5209 arr[i] = arr[j];
5210 j = (j == first) ? 0 : j + 1;
5211 }
5212 }
5213 else if (GET_CODE (x) == CONST_DOUBLE)
5214 {
5215 val = CONST_DOUBLE_LOW (x);
5216 for (i = 15; i >= 8; i--)
5217 {
5218 arr[i] = val & 0xff;
5219 val >>= 8;
5220 }
5221 val = CONST_DOUBLE_HIGH (x);
5222 for (i = 7; i >= 0; i--)
5223 {
5224 arr[i] = val & 0xff;
5225 val >>= 8;
5226 }
5227 }
5228 else if (GET_CODE (x) == CONST_VECTOR)
5229 {
5230 int units;
5231 rtx elt;
5232 mode = GET_MODE_INNER (mode);
5233 units = CONST_VECTOR_NUNITS (x);
5234 for (i = 0; i < units; i++)
5235 {
5236 elt = CONST_VECTOR_ELT (x, i);
5237 if (GET_CODE (elt) == CONST_INT || GET_CODE (elt) == CONST_DOUBLE)
5238 {
5239 if (GET_CODE (elt) == CONST_DOUBLE)
5240 val = const_double_to_hwint (elt);
5241 else
5242 val = INTVAL (elt);
5243 first = GET_MODE_SIZE (mode) - 1;
5244 if (first + i * GET_MODE_SIZE (mode) > 16)
5245 abort ();
5246 for (j = first; j >= 0; j--)
5247 {
5248 arr[j + i * GET_MODE_SIZE (mode)] = val & 0xff;
5249 val >>= 8;
5250 }
5251 }
5252 }
5253 }
5254 else
5255 gcc_unreachable();
5256 }
5257
5258 /* Convert a 16 byte array to a constant of mode MODE. When MODE is
5259 smaller than 16 bytes, use the bytes that would represent that value
5260 in a register, e.g., for QImode return the value of arr[3]. */
5261 rtx
5262 array_to_constant (enum machine_mode mode, const unsigned char arr[16])
5263 {
5264 enum machine_mode inner_mode;
5265 rtvec v;
5266 int units, size, i, j, k;
5267 HOST_WIDE_INT val;
5268
5269 if (GET_MODE_CLASS (mode) == MODE_INT
5270 && GET_MODE_BITSIZE (mode) <= HOST_BITS_PER_WIDE_INT)
5271 {
5272 j = GET_MODE_SIZE (mode);
5273 i = j < 4 ? 4 - j : 0;
5274 for (val = 0; i < j; i++)
5275 val = (val << 8) | arr[i];
5276 val = trunc_int_for_mode (val, mode);
5277 return GEN_INT (val);
5278 }
5279
5280 if (mode == TImode)
5281 {
5282 HOST_WIDE_INT high;
5283 for (i = high = 0; i < 8; i++)
5284 high = (high << 8) | arr[i];
5285 for (i = 8, val = 0; i < 16; i++)
5286 val = (val << 8) | arr[i];
5287 return immed_double_const (val, high, TImode);
5288 }
5289 if (mode == SFmode)
5290 {
5291 val = (arr[0] << 24) | (arr[1] << 16) | (arr[2] << 8) | arr[3];
5292 val = trunc_int_for_mode (val, SImode);
5293 return hwint_to_const_double (SFmode, val);
5294 }
5295 if (mode == DFmode)
5296 {
5297 for (i = 0, val = 0; i < 8; i++)
5298 val = (val << 8) | arr[i];
5299 return hwint_to_const_double (DFmode, val);
5300 }
5301
5302 if (!VECTOR_MODE_P (mode))
5303 abort ();
5304
5305 units = GET_MODE_NUNITS (mode);
5306 size = GET_MODE_UNIT_SIZE (mode);
5307 inner_mode = GET_MODE_INNER (mode);
5308 v = rtvec_alloc (units);
5309
5310 for (k = i = 0; i < units; ++i)
5311 {
5312 val = 0;
5313 for (j = 0; j < size; j++, k++)
5314 val = (val << 8) | arr[k];
5315
5316 if (GET_MODE_CLASS (inner_mode) == MODE_FLOAT)
5317 RTVEC_ELT (v, i) = hwint_to_const_double (inner_mode, val);
5318 else
5319 RTVEC_ELT (v, i) = GEN_INT (trunc_int_for_mode (val, inner_mode));
5320 }
5321 if (k > 16)
5322 abort ();
5323
5324 return gen_rtx_CONST_VECTOR (mode, v);
5325 }
5326
5327 static void
5328 reloc_diagnostic (rtx x)
5329 {
5330 tree decl = 0;
5331 if (!flag_pic || !(TARGET_WARN_RELOC || TARGET_ERROR_RELOC))
5332 return;
5333
5334 if (GET_CODE (x) == SYMBOL_REF)
5335 decl = SYMBOL_REF_DECL (x);
5336 else if (GET_CODE (x) == CONST
5337 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
5338 decl = SYMBOL_REF_DECL (XEXP (XEXP (x, 0), 0));
5339
5340 /* SYMBOL_REF_DECL is not necessarily a DECL. */
5341 if (decl && !DECL_P (decl))
5342 decl = 0;
5343
5344 /* The decl could be a string constant. */
5345 if (decl && DECL_P (decl))
5346 {
5347 location_t loc;
5348 /* We use last_assemble_variable_decl to get line information. It's
5349 not always going to be right and might not even be close, but will
5350 be right for the more common cases. */
5351 if (!last_assemble_variable_decl || in_section == ctors_section)
5352 loc = DECL_SOURCE_LOCATION (decl);
5353 else
5354 loc = DECL_SOURCE_LOCATION (last_assemble_variable_decl);
5355
5356 if (TARGET_WARN_RELOC)
5357 warning_at (loc, 0,
5358 "creating run-time relocation for %qD", decl);
5359 else
5360 error_at (loc,
5361 "creating run-time relocation for %qD", decl);
5362 }
5363 else
5364 {
5365 if (TARGET_WARN_RELOC)
5366 warning_at (input_location, 0, "creating run-time relocation");
5367 else
5368 error_at (input_location, "creating run-time relocation");
5369 }
5370 }
5371
5372 /* Hook into assemble_integer so we can generate an error for run-time
5373 relocations. The SPU ABI disallows them. */
5374 static bool
5375 spu_assemble_integer (rtx x, unsigned int size, int aligned_p)
5376 {
5377 /* By default run-time relocations aren't supported, but we allow them
5378 in case users support it in their own run-time loader. And we provide
5379 a warning for those users that don't. */
5380 if ((GET_CODE (x) == SYMBOL_REF)
5381 || GET_CODE (x) == LABEL_REF || GET_CODE (x) == CONST)
5382 reloc_diagnostic (x);
5383
5384 return default_assemble_integer (x, size, aligned_p);
5385 }
5386
5387 static void
5388 spu_asm_globalize_label (FILE * file, const char *name)
5389 {
5390 fputs ("\t.global\t", file);
5391 assemble_name (file, name);
5392 fputs ("\n", file);
5393 }
5394
5395 static bool
5396 spu_rtx_costs (rtx x, int code, int outer_code ATTRIBUTE_UNUSED, int *total,
5397 bool speed ATTRIBUTE_UNUSED)
5398 {
5399 enum machine_mode mode = GET_MODE (x);
5400 int cost = COSTS_N_INSNS (2);
5401
5402 /* Folding to a CONST_VECTOR will use extra space but there might
5403 be only a small savings in cycles. We'd like to use a CONST_VECTOR
5404 only if it allows us to fold away multiple insns. Changing the cost
5405 of a CONST_VECTOR here (or in CONST_COSTS) doesn't help though
5406 because this cost will only be compared against a single insn.
5407 if (code == CONST_VECTOR)
5408 return (LEGITIMATE_CONSTANT_P(x)) ? cost : COSTS_N_INSNS(6);
5409 */
5410
5411 /* Use defaults for float operations. Not accurate but good enough. */
5412 if (mode == DFmode)
5413 {
5414 *total = COSTS_N_INSNS (13);
5415 return true;
5416 }
5417 if (mode == SFmode)
5418 {
5419 *total = COSTS_N_INSNS (6);
5420 return true;
5421 }
5422 switch (code)
5423 {
5424 case CONST_INT:
5425 if (satisfies_constraint_K (x))
5426 *total = 0;
5427 else if (INTVAL (x) >= -0x80000000ll && INTVAL (x) <= 0xffffffffll)
5428 *total = COSTS_N_INSNS (1);
5429 else
5430 *total = COSTS_N_INSNS (3);
5431 return true;
5432
5433 case CONST:
5434 *total = COSTS_N_INSNS (3);
5435 return true;
5436
5437 case LABEL_REF:
5438 case SYMBOL_REF:
5439 *total = COSTS_N_INSNS (0);
5440 return true;
5441
5442 case CONST_DOUBLE:
5443 *total = COSTS_N_INSNS (5);
5444 return true;
5445
5446 case FLOAT_EXTEND:
5447 case FLOAT_TRUNCATE:
5448 case FLOAT:
5449 case UNSIGNED_FLOAT:
5450 case FIX:
5451 case UNSIGNED_FIX:
5452 *total = COSTS_N_INSNS (7);
5453 return true;
5454
5455 case PLUS:
5456 if (mode == TImode)
5457 {
5458 *total = COSTS_N_INSNS (9);
5459 return true;
5460 }
5461 break;
5462
5463 case MULT:
5464 cost =
5465 GET_CODE (XEXP (x, 0)) ==
5466 REG ? COSTS_N_INSNS (12) : COSTS_N_INSNS (7);
5467 if (mode == SImode && GET_CODE (XEXP (x, 0)) == REG)
5468 {
5469 if (GET_CODE (XEXP (x, 1)) == CONST_INT)
5470 {
5471 HOST_WIDE_INT val = INTVAL (XEXP (x, 1));
5472 cost = COSTS_N_INSNS (14);
5473 if ((val & 0xffff) == 0)
5474 cost = COSTS_N_INSNS (9);
5475 else if (val > 0 && val < 0x10000)
5476 cost = COSTS_N_INSNS (11);
5477 }
5478 }
5479 *total = cost;
5480 return true;
5481 case DIV:
5482 case UDIV:
5483 case MOD:
5484 case UMOD:
5485 *total = COSTS_N_INSNS (20);
5486 return true;
5487 case ROTATE:
5488 case ROTATERT:
5489 case ASHIFT:
5490 case ASHIFTRT:
5491 case LSHIFTRT:
5492 *total = COSTS_N_INSNS (4);
5493 return true;
5494 case UNSPEC:
5495 if (XINT (x, 1) == UNSPEC_CONVERT)
5496 *total = COSTS_N_INSNS (0);
5497 else
5498 *total = COSTS_N_INSNS (4);
5499 return true;
5500 }
5501 /* Scale cost by mode size. Except when initializing (cfun->decl == 0). */
5502 if (GET_MODE_CLASS (mode) == MODE_INT
5503 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SImode) && cfun && cfun->decl)
5504 cost = cost * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode))
5505 * (GET_MODE_SIZE (mode) / GET_MODE_SIZE (SImode));
5506 *total = cost;
5507 return true;
5508 }
5509
5510 static enum machine_mode
5511 spu_unwind_word_mode (void)
5512 {
5513 return SImode;
5514 }
5515
5516 /* Decide whether we can make a sibling call to a function. DECL is the
5517 declaration of the function being targeted by the call and EXP is the
5518 CALL_EXPR representing the call. */
5519 static bool
5520 spu_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
5521 {
5522 return decl && !TARGET_LARGE_MEM;
5523 }
5524
5525 /* We need to correctly update the back chain pointer and the Available
5526 Stack Size (which is in the second slot of the sp register.) */
5527 void
5528 spu_allocate_stack (rtx op0, rtx op1)
5529 {
5530 HOST_WIDE_INT v;
5531 rtx chain = gen_reg_rtx (V4SImode);
5532 rtx stack_bot = gen_frame_mem (V4SImode, stack_pointer_rtx);
5533 rtx sp = gen_reg_rtx (V4SImode);
5534 rtx splatted = gen_reg_rtx (V4SImode);
5535 rtx pat = gen_reg_rtx (TImode);
5536
5537 /* copy the back chain so we can save it back again. */
5538 emit_move_insn (chain, stack_bot);
5539
5540 op1 = force_reg (SImode, op1);
5541
5542 v = 0x1020300010203ll;
5543 emit_move_insn (pat, immed_double_const (v, v, TImode));
5544 emit_insn (gen_shufb (splatted, op1, op1, pat));
5545
5546 emit_insn (gen_spu_convert (sp, stack_pointer_rtx));
5547 emit_insn (gen_subv4si3 (sp, sp, splatted));
5548
5549 if (flag_stack_check)
5550 {
5551 rtx avail = gen_reg_rtx(SImode);
5552 rtx result = gen_reg_rtx(SImode);
5553 emit_insn (gen_vec_extractv4si (avail, sp, GEN_INT (1)));
5554 emit_insn (gen_cgt_si(result, avail, GEN_INT (-1)));
5555 emit_insn (gen_spu_heq (result, GEN_INT(0) ));
5556 }
5557
5558 emit_insn (gen_spu_convert (stack_pointer_rtx, sp));
5559
5560 emit_move_insn (stack_bot, chain);
5561
5562 emit_move_insn (op0, virtual_stack_dynamic_rtx);
5563 }
5564
5565 void
5566 spu_restore_stack_nonlocal (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5567 {
5568 static unsigned char arr[16] =
5569 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5570 rtx temp = gen_reg_rtx (SImode);
5571 rtx temp2 = gen_reg_rtx (SImode);
5572 rtx temp3 = gen_reg_rtx (V4SImode);
5573 rtx temp4 = gen_reg_rtx (V4SImode);
5574 rtx pat = gen_reg_rtx (TImode);
5575 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5576
5577 /* Restore the backchain from the first word, sp from the second. */
5578 emit_move_insn (temp2, adjust_address_nv (op1, SImode, 0));
5579 emit_move_insn (temp, adjust_address_nv (op1, SImode, 4));
5580
5581 emit_move_insn (pat, array_to_constant (TImode, arr));
5582
5583 /* Compute Available Stack Size for sp */
5584 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5585 emit_insn (gen_shufb (temp3, temp, temp, pat));
5586
5587 /* Compute Available Stack Size for back chain */
5588 emit_insn (gen_subsi3 (temp2, temp2, stack_pointer_rtx));
5589 emit_insn (gen_shufb (temp4, temp2, temp2, pat));
5590 emit_insn (gen_addv4si3 (temp4, sp, temp4));
5591
5592 emit_insn (gen_addv4si3 (sp, sp, temp3));
5593 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp4);
5594 }
5595
5596 static void
5597 spu_init_libfuncs (void)
5598 {
5599 set_optab_libfunc (smul_optab, DImode, "__muldi3");
5600 set_optab_libfunc (sdiv_optab, DImode, "__divdi3");
5601 set_optab_libfunc (smod_optab, DImode, "__moddi3");
5602 set_optab_libfunc (udiv_optab, DImode, "__udivdi3");
5603 set_optab_libfunc (umod_optab, DImode, "__umoddi3");
5604 set_optab_libfunc (udivmod_optab, DImode, "__udivmoddi4");
5605 set_optab_libfunc (ffs_optab, DImode, "__ffsdi2");
5606 set_optab_libfunc (clz_optab, DImode, "__clzdi2");
5607 set_optab_libfunc (ctz_optab, DImode, "__ctzdi2");
5608 set_optab_libfunc (popcount_optab, DImode, "__popcountdi2");
5609 set_optab_libfunc (parity_optab, DImode, "__paritydi2");
5610
5611 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__float_unssidf");
5612 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__float_unsdidf");
5613
5614 set_optab_libfunc (smul_optab, TImode, "__multi3");
5615 set_optab_libfunc (sdiv_optab, TImode, "__divti3");
5616 set_optab_libfunc (smod_optab, TImode, "__modti3");
5617 set_optab_libfunc (udiv_optab, TImode, "__udivti3");
5618 set_optab_libfunc (umod_optab, TImode, "__umodti3");
5619 set_optab_libfunc (udivmod_optab, TImode, "__udivmodti4");
5620 }
5621
5622 /* Make a subreg, stripping any existing subreg. We could possibly just
5623 call simplify_subreg, but in this case we know what we want. */
5624 rtx
5625 spu_gen_subreg (enum machine_mode mode, rtx x)
5626 {
5627 if (GET_CODE (x) == SUBREG)
5628 x = SUBREG_REG (x);
5629 if (GET_MODE (x) == mode)
5630 return x;
5631 return gen_rtx_SUBREG (mode, x, 0);
5632 }
5633
5634 static bool
5635 spu_return_in_memory (const_tree type, const_tree fntype ATTRIBUTE_UNUSED)
5636 {
5637 return (TYPE_MODE (type) == BLKmode
5638 && ((type) == 0
5639 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST
5640 || int_size_in_bytes (type) >
5641 (MAX_REGISTER_RETURN * UNITS_PER_WORD)));
5642 }
5643 \f
5644 /* Create the built-in types and functions */
5645
5646 enum spu_function_code
5647 {
5648 #define DEF_BUILTIN(fcode, icode, name, type, params) fcode,
5649 #include "spu-builtins.def"
5650 #undef DEF_BUILTIN
5651 NUM_SPU_BUILTINS
5652 };
5653
5654 extern GTY(()) struct spu_builtin_description spu_builtins[NUM_SPU_BUILTINS];
5655
5656 struct spu_builtin_description spu_builtins[] = {
5657 #define DEF_BUILTIN(fcode, icode, name, type, params) \
5658 {fcode, icode, name, type, params},
5659 #include "spu-builtins.def"
5660 #undef DEF_BUILTIN
5661 };
5662
5663 static GTY(()) tree spu_builtin_decls[NUM_SPU_BUILTINS];
5664
5665 /* Returns the spu builtin decl for CODE. */
5666
5667 static tree
5668 spu_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
5669 {
5670 if (code >= NUM_SPU_BUILTINS)
5671 return error_mark_node;
5672
5673 return spu_builtin_decls[code];
5674 }
5675
5676
5677 static void
5678 spu_init_builtins (void)
5679 {
5680 struct spu_builtin_description *d;
5681 unsigned int i;
5682
5683 V16QI_type_node = build_vector_type (intQI_type_node, 16);
5684 V8HI_type_node = build_vector_type (intHI_type_node, 8);
5685 V4SI_type_node = build_vector_type (intSI_type_node, 4);
5686 V2DI_type_node = build_vector_type (intDI_type_node, 2);
5687 V4SF_type_node = build_vector_type (float_type_node, 4);
5688 V2DF_type_node = build_vector_type (double_type_node, 2);
5689
5690 unsigned_V16QI_type_node = build_vector_type (unsigned_intQI_type_node, 16);
5691 unsigned_V8HI_type_node = build_vector_type (unsigned_intHI_type_node, 8);
5692 unsigned_V4SI_type_node = build_vector_type (unsigned_intSI_type_node, 4);
5693 unsigned_V2DI_type_node = build_vector_type (unsigned_intDI_type_node, 2);
5694
5695 spu_builtin_types[SPU_BTI_QUADWORD] = V16QI_type_node;
5696
5697 spu_builtin_types[SPU_BTI_7] = global_trees[TI_INTSI_TYPE];
5698 spu_builtin_types[SPU_BTI_S7] = global_trees[TI_INTSI_TYPE];
5699 spu_builtin_types[SPU_BTI_U7] = global_trees[TI_INTSI_TYPE];
5700 spu_builtin_types[SPU_BTI_S10] = global_trees[TI_INTSI_TYPE];
5701 spu_builtin_types[SPU_BTI_S10_4] = global_trees[TI_INTSI_TYPE];
5702 spu_builtin_types[SPU_BTI_U14] = global_trees[TI_INTSI_TYPE];
5703 spu_builtin_types[SPU_BTI_16] = global_trees[TI_INTSI_TYPE];
5704 spu_builtin_types[SPU_BTI_S16] = global_trees[TI_INTSI_TYPE];
5705 spu_builtin_types[SPU_BTI_S16_2] = global_trees[TI_INTSI_TYPE];
5706 spu_builtin_types[SPU_BTI_U16] = global_trees[TI_INTSI_TYPE];
5707 spu_builtin_types[SPU_BTI_U16_2] = global_trees[TI_INTSI_TYPE];
5708 spu_builtin_types[SPU_BTI_U18] = global_trees[TI_INTSI_TYPE];
5709
5710 spu_builtin_types[SPU_BTI_INTQI] = global_trees[TI_INTQI_TYPE];
5711 spu_builtin_types[SPU_BTI_INTHI] = global_trees[TI_INTHI_TYPE];
5712 spu_builtin_types[SPU_BTI_INTSI] = global_trees[TI_INTSI_TYPE];
5713 spu_builtin_types[SPU_BTI_INTDI] = global_trees[TI_INTDI_TYPE];
5714 spu_builtin_types[SPU_BTI_UINTQI] = global_trees[TI_UINTQI_TYPE];
5715 spu_builtin_types[SPU_BTI_UINTHI] = global_trees[TI_UINTHI_TYPE];
5716 spu_builtin_types[SPU_BTI_UINTSI] = global_trees[TI_UINTSI_TYPE];
5717 spu_builtin_types[SPU_BTI_UINTDI] = global_trees[TI_UINTDI_TYPE];
5718
5719 spu_builtin_types[SPU_BTI_FLOAT] = global_trees[TI_FLOAT_TYPE];
5720 spu_builtin_types[SPU_BTI_DOUBLE] = global_trees[TI_DOUBLE_TYPE];
5721
5722 spu_builtin_types[SPU_BTI_VOID] = global_trees[TI_VOID_TYPE];
5723
5724 spu_builtin_types[SPU_BTI_PTR] =
5725 build_pointer_type (build_qualified_type
5726 (void_type_node,
5727 TYPE_QUAL_CONST | TYPE_QUAL_VOLATILE));
5728
5729 /* For each builtin we build a new prototype. The tree code will make
5730 sure nodes are shared. */
5731 for (i = 0, d = spu_builtins; i < NUM_SPU_BUILTINS; i++, d++)
5732 {
5733 tree p;
5734 char name[64]; /* build_function will make a copy. */
5735 int parm;
5736
5737 if (d->name == 0)
5738 continue;
5739
5740 /* Find last parm. */
5741 for (parm = 1; d->parm[parm] != SPU_BTI_END_OF_PARAMS; parm++)
5742 ;
5743
5744 p = void_list_node;
5745 while (parm > 1)
5746 p = tree_cons (NULL_TREE, spu_builtin_types[d->parm[--parm]], p);
5747
5748 p = build_function_type (spu_builtin_types[d->parm[0]], p);
5749
5750 sprintf (name, "__builtin_%s", d->name);
5751 spu_builtin_decls[i] =
5752 add_builtin_function (name, p, i, BUILT_IN_MD, NULL, NULL_TREE);
5753 if (d->fcode == SPU_MASK_FOR_LOAD)
5754 TREE_READONLY (spu_builtin_decls[i]) = 1;
5755
5756 /* These builtins don't throw. */
5757 TREE_NOTHROW (spu_builtin_decls[i]) = 1;
5758 }
5759 }
5760
5761 void
5762 spu_restore_stack_block (rtx op0 ATTRIBUTE_UNUSED, rtx op1)
5763 {
5764 static unsigned char arr[16] =
5765 { 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3 };
5766
5767 rtx temp = gen_reg_rtx (Pmode);
5768 rtx temp2 = gen_reg_rtx (V4SImode);
5769 rtx temp3 = gen_reg_rtx (V4SImode);
5770 rtx pat = gen_reg_rtx (TImode);
5771 rtx sp = gen_rtx_REG (V4SImode, STACK_POINTER_REGNUM);
5772
5773 emit_move_insn (pat, array_to_constant (TImode, arr));
5774
5775 /* Restore the sp. */
5776 emit_move_insn (temp, op1);
5777 emit_move_insn (temp2, gen_frame_mem (V4SImode, stack_pointer_rtx));
5778
5779 /* Compute available stack size for sp. */
5780 emit_insn (gen_subsi3 (temp, temp, stack_pointer_rtx));
5781 emit_insn (gen_shufb (temp3, temp, temp, pat));
5782
5783 emit_insn (gen_addv4si3 (sp, sp, temp3));
5784 emit_move_insn (gen_frame_mem (V4SImode, stack_pointer_rtx), temp2);
5785 }
5786
5787 int
5788 spu_safe_dma (HOST_WIDE_INT channel)
5789 {
5790 return TARGET_SAFE_DMA && channel >= 21 && channel <= 27;
5791 }
5792
5793 void
5794 spu_builtin_splats (rtx ops[])
5795 {
5796 enum machine_mode mode = GET_MODE (ops[0]);
5797 if (GET_CODE (ops[1]) == CONST_INT || GET_CODE (ops[1]) == CONST_DOUBLE)
5798 {
5799 unsigned char arr[16];
5800 constant_to_array (GET_MODE_INNER (mode), ops[1], arr);
5801 emit_move_insn (ops[0], array_to_constant (mode, arr));
5802 }
5803 else
5804 {
5805 rtx reg = gen_reg_rtx (TImode);
5806 rtx shuf;
5807 if (GET_CODE (ops[1]) != REG
5808 && GET_CODE (ops[1]) != SUBREG)
5809 ops[1] = force_reg (GET_MODE_INNER (mode), ops[1]);
5810 switch (mode)
5811 {
5812 case V2DImode:
5813 case V2DFmode:
5814 shuf =
5815 immed_double_const (0x0001020304050607ll, 0x1011121314151617ll,
5816 TImode);
5817 break;
5818 case V4SImode:
5819 case V4SFmode:
5820 shuf =
5821 immed_double_const (0x0001020300010203ll, 0x0001020300010203ll,
5822 TImode);
5823 break;
5824 case V8HImode:
5825 shuf =
5826 immed_double_const (0x0203020302030203ll, 0x0203020302030203ll,
5827 TImode);
5828 break;
5829 case V16QImode:
5830 shuf =
5831 immed_double_const (0x0303030303030303ll, 0x0303030303030303ll,
5832 TImode);
5833 break;
5834 default:
5835 abort ();
5836 }
5837 emit_move_insn (reg, shuf);
5838 emit_insn (gen_shufb (ops[0], ops[1], ops[1], reg));
5839 }
5840 }
5841
5842 void
5843 spu_builtin_extract (rtx ops[])
5844 {
5845 enum machine_mode mode;
5846 rtx rot, from, tmp;
5847
5848 mode = GET_MODE (ops[1]);
5849
5850 if (GET_CODE (ops[2]) == CONST_INT)
5851 {
5852 switch (mode)
5853 {
5854 case V16QImode:
5855 emit_insn (gen_vec_extractv16qi (ops[0], ops[1], ops[2]));
5856 break;
5857 case V8HImode:
5858 emit_insn (gen_vec_extractv8hi (ops[0], ops[1], ops[2]));
5859 break;
5860 case V4SFmode:
5861 emit_insn (gen_vec_extractv4sf (ops[0], ops[1], ops[2]));
5862 break;
5863 case V4SImode:
5864 emit_insn (gen_vec_extractv4si (ops[0], ops[1], ops[2]));
5865 break;
5866 case V2DImode:
5867 emit_insn (gen_vec_extractv2di (ops[0], ops[1], ops[2]));
5868 break;
5869 case V2DFmode:
5870 emit_insn (gen_vec_extractv2df (ops[0], ops[1], ops[2]));
5871 break;
5872 default:
5873 abort ();
5874 }
5875 return;
5876 }
5877
5878 from = spu_gen_subreg (TImode, ops[1]);
5879 rot = gen_reg_rtx (TImode);
5880 tmp = gen_reg_rtx (SImode);
5881
5882 switch (mode)
5883 {
5884 case V16QImode:
5885 emit_insn (gen_addsi3 (tmp, ops[2], GEN_INT (-3)));
5886 break;
5887 case V8HImode:
5888 emit_insn (gen_addsi3 (tmp, ops[2], ops[2]));
5889 emit_insn (gen_addsi3 (tmp, tmp, GEN_INT (-2)));
5890 break;
5891 case V4SFmode:
5892 case V4SImode:
5893 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (2)));
5894 break;
5895 case V2DImode:
5896 case V2DFmode:
5897 emit_insn (gen_ashlsi3 (tmp, ops[2], GEN_INT (3)));
5898 break;
5899 default:
5900 abort ();
5901 }
5902 emit_insn (gen_rotqby_ti (rot, from, tmp));
5903
5904 emit_insn (gen_spu_convert (ops[0], rot));
5905 }
5906
5907 void
5908 spu_builtin_insert (rtx ops[])
5909 {
5910 enum machine_mode mode = GET_MODE (ops[0]);
5911 enum machine_mode imode = GET_MODE_INNER (mode);
5912 rtx mask = gen_reg_rtx (TImode);
5913 rtx offset;
5914
5915 if (GET_CODE (ops[3]) == CONST_INT)
5916 offset = GEN_INT (INTVAL (ops[3]) * GET_MODE_SIZE (imode));
5917 else
5918 {
5919 offset = gen_reg_rtx (SImode);
5920 emit_insn (gen_mulsi3
5921 (offset, ops[3], GEN_INT (GET_MODE_SIZE (imode))));
5922 }
5923 emit_insn (gen_cpat
5924 (mask, stack_pointer_rtx, offset,
5925 GEN_INT (GET_MODE_SIZE (imode))));
5926 emit_insn (gen_shufb (ops[0], ops[1], ops[2], mask));
5927 }
5928
5929 void
5930 spu_builtin_promote (rtx ops[])
5931 {
5932 enum machine_mode mode, imode;
5933 rtx rot, from, offset;
5934 HOST_WIDE_INT pos;
5935
5936 mode = GET_MODE (ops[0]);
5937 imode = GET_MODE_INNER (mode);
5938
5939 from = gen_reg_rtx (TImode);
5940 rot = spu_gen_subreg (TImode, ops[0]);
5941
5942 emit_insn (gen_spu_convert (from, ops[1]));
5943
5944 if (GET_CODE (ops[2]) == CONST_INT)
5945 {
5946 pos = -GET_MODE_SIZE (imode) * INTVAL (ops[2]);
5947 if (GET_MODE_SIZE (imode) < 4)
5948 pos += 4 - GET_MODE_SIZE (imode);
5949 offset = GEN_INT (pos & 15);
5950 }
5951 else
5952 {
5953 offset = gen_reg_rtx (SImode);
5954 switch (mode)
5955 {
5956 case V16QImode:
5957 emit_insn (gen_subsi3 (offset, GEN_INT (3), ops[2]));
5958 break;
5959 case V8HImode:
5960 emit_insn (gen_subsi3 (offset, GEN_INT (1), ops[2]));
5961 emit_insn (gen_addsi3 (offset, offset, offset));
5962 break;
5963 case V4SFmode:
5964 case V4SImode:
5965 emit_insn (gen_subsi3 (offset, GEN_INT (0), ops[2]));
5966 emit_insn (gen_ashlsi3 (offset, offset, GEN_INT (2)));
5967 break;
5968 case V2DImode:
5969 case V2DFmode:
5970 emit_insn (gen_ashlsi3 (offset, ops[2], GEN_INT (3)));
5971 break;
5972 default:
5973 abort ();
5974 }
5975 }
5976 emit_insn (gen_rotqby_ti (rot, from, offset));
5977 }
5978
5979 static void
5980 spu_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
5981 {
5982 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
5983 rtx shuf = gen_reg_rtx (V4SImode);
5984 rtx insn = gen_reg_rtx (V4SImode);
5985 rtx shufc;
5986 rtx insnc;
5987 rtx mem;
5988
5989 fnaddr = force_reg (SImode, fnaddr);
5990 cxt = force_reg (SImode, cxt);
5991
5992 if (TARGET_LARGE_MEM)
5993 {
5994 rtx rotl = gen_reg_rtx (V4SImode);
5995 rtx mask = gen_reg_rtx (V4SImode);
5996 rtx bi = gen_reg_rtx (SImode);
5997 static unsigned char const shufa[16] = {
5998 2, 3, 0, 1, 18, 19, 16, 17,
5999 0, 1, 2, 3, 16, 17, 18, 19
6000 };
6001 static unsigned char const insna[16] = {
6002 0x41, 0, 0, 79,
6003 0x41, 0, 0, STATIC_CHAIN_REGNUM,
6004 0x60, 0x80, 0, 79,
6005 0x60, 0x80, 0, STATIC_CHAIN_REGNUM
6006 };
6007
6008 shufc = force_reg (TImode, array_to_constant (TImode, shufa));
6009 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6010
6011 emit_insn (gen_shufb (shuf, fnaddr, cxt, shufc));
6012 emit_insn (gen_vrotlv4si3 (rotl, shuf, spu_const (V4SImode, 7)));
6013 emit_insn (gen_movv4si (mask, spu_const (V4SImode, 0xffff << 7)));
6014 emit_insn (gen_selb (insn, insnc, rotl, mask));
6015
6016 mem = adjust_address (m_tramp, V4SImode, 0);
6017 emit_move_insn (mem, insn);
6018
6019 emit_move_insn (bi, GEN_INT (0x35000000 + (79 << 7)));
6020 mem = adjust_address (m_tramp, Pmode, 16);
6021 emit_move_insn (mem, bi);
6022 }
6023 else
6024 {
6025 rtx scxt = gen_reg_rtx (SImode);
6026 rtx sfnaddr = gen_reg_rtx (SImode);
6027 static unsigned char const insna[16] = {
6028 0x42, 0, 0, STATIC_CHAIN_REGNUM,
6029 0x30, 0, 0, 0,
6030 0, 0, 0, 0,
6031 0, 0, 0, 0
6032 };
6033
6034 shufc = gen_reg_rtx (TImode);
6035 insnc = force_reg (V4SImode, array_to_constant (V4SImode, insna));
6036
6037 /* By or'ing all of cxt with the ila opcode we are assuming cxt
6038 fits 18 bits and the last 4 are zeros. This will be true if
6039 the stack pointer is initialized to 0x3fff0 at program start,
6040 otherwise the ila instruction will be garbage. */
6041
6042 emit_insn (gen_ashlsi3 (scxt, cxt, GEN_INT (7)));
6043 emit_insn (gen_ashlsi3 (sfnaddr, fnaddr, GEN_INT (5)));
6044 emit_insn (gen_cpat
6045 (shufc, stack_pointer_rtx, GEN_INT (4), GEN_INT (4)));
6046 emit_insn (gen_shufb (shuf, sfnaddr, scxt, shufc));
6047 emit_insn (gen_iorv4si3 (insn, insnc, shuf));
6048
6049 mem = adjust_address (m_tramp, V4SImode, 0);
6050 emit_move_insn (mem, insn);
6051 }
6052 emit_insn (gen_sync ());
6053 }
6054
6055 void
6056 spu_expand_sign_extend (rtx ops[])
6057 {
6058 unsigned char arr[16];
6059 rtx pat = gen_reg_rtx (TImode);
6060 rtx sign, c;
6061 int i, last;
6062 last = GET_MODE (ops[0]) == DImode ? 7 : 15;
6063 if (GET_MODE (ops[1]) == QImode)
6064 {
6065 sign = gen_reg_rtx (HImode);
6066 emit_insn (gen_extendqihi2 (sign, ops[1]));
6067 for (i = 0; i < 16; i++)
6068 arr[i] = 0x12;
6069 arr[last] = 0x13;
6070 }
6071 else
6072 {
6073 for (i = 0; i < 16; i++)
6074 arr[i] = 0x10;
6075 switch (GET_MODE (ops[1]))
6076 {
6077 case HImode:
6078 sign = gen_reg_rtx (SImode);
6079 emit_insn (gen_extendhisi2 (sign, ops[1]));
6080 arr[last] = 0x03;
6081 arr[last - 1] = 0x02;
6082 break;
6083 case SImode:
6084 sign = gen_reg_rtx (SImode);
6085 emit_insn (gen_ashrsi3 (sign, ops[1], GEN_INT (31)));
6086 for (i = 0; i < 4; i++)
6087 arr[last - i] = 3 - i;
6088 break;
6089 case DImode:
6090 sign = gen_reg_rtx (SImode);
6091 c = gen_reg_rtx (SImode);
6092 emit_insn (gen_spu_convert (c, ops[1]));
6093 emit_insn (gen_ashrsi3 (sign, c, GEN_INT (31)));
6094 for (i = 0; i < 8; i++)
6095 arr[last - i] = 7 - i;
6096 break;
6097 default:
6098 abort ();
6099 }
6100 }
6101 emit_move_insn (pat, array_to_constant (TImode, arr));
6102 emit_insn (gen_shufb (ops[0], ops[1], sign, pat));
6103 }
6104
6105 /* expand vector initialization. If there are any constant parts,
6106 load constant parts first. Then load any non-constant parts. */
6107 void
6108 spu_expand_vector_init (rtx target, rtx vals)
6109 {
6110 enum machine_mode mode = GET_MODE (target);
6111 int n_elts = GET_MODE_NUNITS (mode);
6112 int n_var = 0;
6113 bool all_same = true;
6114 rtx first, x = NULL_RTX, first_constant = NULL_RTX;
6115 int i;
6116
6117 first = XVECEXP (vals, 0, 0);
6118 for (i = 0; i < n_elts; ++i)
6119 {
6120 x = XVECEXP (vals, 0, i);
6121 if (!(CONST_INT_P (x)
6122 || GET_CODE (x) == CONST_DOUBLE
6123 || GET_CODE (x) == CONST_FIXED))
6124 ++n_var;
6125 else
6126 {
6127 if (first_constant == NULL_RTX)
6128 first_constant = x;
6129 }
6130 if (i > 0 && !rtx_equal_p (x, first))
6131 all_same = false;
6132 }
6133
6134 /* if all elements are the same, use splats to repeat elements */
6135 if (all_same)
6136 {
6137 if (!CONSTANT_P (first)
6138 && !register_operand (first, GET_MODE (x)))
6139 first = force_reg (GET_MODE (first), first);
6140 emit_insn (gen_spu_splats (target, first));
6141 return;
6142 }
6143
6144 /* load constant parts */
6145 if (n_var != n_elts)
6146 {
6147 if (n_var == 0)
6148 {
6149 emit_move_insn (target,
6150 gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
6151 }
6152 else
6153 {
6154 rtx constant_parts_rtx = copy_rtx (vals);
6155
6156 gcc_assert (first_constant != NULL_RTX);
6157 /* fill empty slots with the first constant, this increases
6158 our chance of using splats in the recursive call below. */
6159 for (i = 0; i < n_elts; ++i)
6160 {
6161 x = XVECEXP (constant_parts_rtx, 0, i);
6162 if (!(CONST_INT_P (x)
6163 || GET_CODE (x) == CONST_DOUBLE
6164 || GET_CODE (x) == CONST_FIXED))
6165 XVECEXP (constant_parts_rtx, 0, i) = first_constant;
6166 }
6167
6168 spu_expand_vector_init (target, constant_parts_rtx);
6169 }
6170 }
6171
6172 /* load variable parts */
6173 if (n_var != 0)
6174 {
6175 rtx insert_operands[4];
6176
6177 insert_operands[0] = target;
6178 insert_operands[2] = target;
6179 for (i = 0; i < n_elts; ++i)
6180 {
6181 x = XVECEXP (vals, 0, i);
6182 if (!(CONST_INT_P (x)
6183 || GET_CODE (x) == CONST_DOUBLE
6184 || GET_CODE (x) == CONST_FIXED))
6185 {
6186 if (!register_operand (x, GET_MODE (x)))
6187 x = force_reg (GET_MODE (x), x);
6188 insert_operands[1] = x;
6189 insert_operands[3] = GEN_INT (i);
6190 spu_builtin_insert (insert_operands);
6191 }
6192 }
6193 }
6194 }
6195
6196 /* Return insn index for the vector compare instruction for given CODE,
6197 and DEST_MODE, OP_MODE. Return -1 if valid insn is not available. */
6198
6199 static int
6200 get_vec_cmp_insn (enum rtx_code code,
6201 enum machine_mode dest_mode,
6202 enum machine_mode op_mode)
6203
6204 {
6205 switch (code)
6206 {
6207 case EQ:
6208 if (dest_mode == V16QImode && op_mode == V16QImode)
6209 return CODE_FOR_ceq_v16qi;
6210 if (dest_mode == V8HImode && op_mode == V8HImode)
6211 return CODE_FOR_ceq_v8hi;
6212 if (dest_mode == V4SImode && op_mode == V4SImode)
6213 return CODE_FOR_ceq_v4si;
6214 if (dest_mode == V4SImode && op_mode == V4SFmode)
6215 return CODE_FOR_ceq_v4sf;
6216 if (dest_mode == V2DImode && op_mode == V2DFmode)
6217 return CODE_FOR_ceq_v2df;
6218 break;
6219 case GT:
6220 if (dest_mode == V16QImode && op_mode == V16QImode)
6221 return CODE_FOR_cgt_v16qi;
6222 if (dest_mode == V8HImode && op_mode == V8HImode)
6223 return CODE_FOR_cgt_v8hi;
6224 if (dest_mode == V4SImode && op_mode == V4SImode)
6225 return CODE_FOR_cgt_v4si;
6226 if (dest_mode == V4SImode && op_mode == V4SFmode)
6227 return CODE_FOR_cgt_v4sf;
6228 if (dest_mode == V2DImode && op_mode == V2DFmode)
6229 return CODE_FOR_cgt_v2df;
6230 break;
6231 case GTU:
6232 if (dest_mode == V16QImode && op_mode == V16QImode)
6233 return CODE_FOR_clgt_v16qi;
6234 if (dest_mode == V8HImode && op_mode == V8HImode)
6235 return CODE_FOR_clgt_v8hi;
6236 if (dest_mode == V4SImode && op_mode == V4SImode)
6237 return CODE_FOR_clgt_v4si;
6238 break;
6239 default:
6240 break;
6241 }
6242 return -1;
6243 }
6244
6245 /* Emit vector compare for operands OP0 and OP1 using code RCODE.
6246 DMODE is expected destination mode. This is a recursive function. */
6247
6248 static rtx
6249 spu_emit_vector_compare (enum rtx_code rcode,
6250 rtx op0, rtx op1,
6251 enum machine_mode dmode)
6252 {
6253 int vec_cmp_insn;
6254 rtx mask;
6255 enum machine_mode dest_mode;
6256 enum machine_mode op_mode = GET_MODE (op1);
6257
6258 gcc_assert (GET_MODE (op0) == GET_MODE (op1));
6259
6260 /* Floating point vector compare instructions uses destination V4SImode.
6261 Double floating point vector compare instructions uses destination V2DImode.
6262 Move destination to appropriate mode later. */
6263 if (dmode == V4SFmode)
6264 dest_mode = V4SImode;
6265 else if (dmode == V2DFmode)
6266 dest_mode = V2DImode;
6267 else
6268 dest_mode = dmode;
6269
6270 mask = gen_reg_rtx (dest_mode);
6271 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6272
6273 if (vec_cmp_insn == -1)
6274 {
6275 bool swap_operands = false;
6276 bool try_again = false;
6277 switch (rcode)
6278 {
6279 case LT:
6280 rcode = GT;
6281 swap_operands = true;
6282 try_again = true;
6283 break;
6284 case LTU:
6285 rcode = GTU;
6286 swap_operands = true;
6287 try_again = true;
6288 break;
6289 case NE:
6290 /* Treat A != B as ~(A==B). */
6291 {
6292 enum insn_code nor_code;
6293 rtx eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6294 nor_code = optab_handler (one_cmpl_optab, dest_mode);
6295 gcc_assert (nor_code != CODE_FOR_nothing);
6296 emit_insn (GEN_FCN (nor_code) (mask, eq_rtx));
6297 if (dmode != dest_mode)
6298 {
6299 rtx temp = gen_reg_rtx (dest_mode);
6300 convert_move (temp, mask, 0);
6301 return temp;
6302 }
6303 return mask;
6304 }
6305 break;
6306 case GE:
6307 case GEU:
6308 case LE:
6309 case LEU:
6310 /* Try GT/GTU/LT/LTU OR EQ */
6311 {
6312 rtx c_rtx, eq_rtx;
6313 enum insn_code ior_code;
6314 enum rtx_code new_code;
6315
6316 switch (rcode)
6317 {
6318 case GE: new_code = GT; break;
6319 case GEU: new_code = GTU; break;
6320 case LE: new_code = LT; break;
6321 case LEU: new_code = LTU; break;
6322 default:
6323 gcc_unreachable ();
6324 }
6325
6326 c_rtx = spu_emit_vector_compare (new_code, op0, op1, dest_mode);
6327 eq_rtx = spu_emit_vector_compare (EQ, op0, op1, dest_mode);
6328
6329 ior_code = optab_handler (ior_optab, dest_mode);
6330 gcc_assert (ior_code != CODE_FOR_nothing);
6331 emit_insn (GEN_FCN (ior_code) (mask, c_rtx, eq_rtx));
6332 if (dmode != dest_mode)
6333 {
6334 rtx temp = gen_reg_rtx (dest_mode);
6335 convert_move (temp, mask, 0);
6336 return temp;
6337 }
6338 return mask;
6339 }
6340 break;
6341 default:
6342 gcc_unreachable ();
6343 }
6344
6345 /* You only get two chances. */
6346 if (try_again)
6347 vec_cmp_insn = get_vec_cmp_insn (rcode, dest_mode, op_mode);
6348
6349 gcc_assert (vec_cmp_insn != -1);
6350
6351 if (swap_operands)
6352 {
6353 rtx tmp;
6354 tmp = op0;
6355 op0 = op1;
6356 op1 = tmp;
6357 }
6358 }
6359
6360 emit_insn (GEN_FCN (vec_cmp_insn) (mask, op0, op1));
6361 if (dmode != dest_mode)
6362 {
6363 rtx temp = gen_reg_rtx (dest_mode);
6364 convert_move (temp, mask, 0);
6365 return temp;
6366 }
6367 return mask;
6368 }
6369
6370
6371 /* Emit vector conditional expression.
6372 DEST is destination. OP1 and OP2 are two VEC_COND_EXPR operands.
6373 CC_OP0 and CC_OP1 are the two operands for the relation operation COND. */
6374
6375 int
6376 spu_emit_vector_cond_expr (rtx dest, rtx op1, rtx op2,
6377 rtx cond, rtx cc_op0, rtx cc_op1)
6378 {
6379 enum machine_mode dest_mode = GET_MODE (dest);
6380 enum rtx_code rcode = GET_CODE (cond);
6381 rtx mask;
6382
6383 /* Get the vector mask for the given relational operations. */
6384 mask = spu_emit_vector_compare (rcode, cc_op0, cc_op1, dest_mode);
6385
6386 emit_insn(gen_selb (dest, op2, op1, mask));
6387
6388 return 1;
6389 }
6390
6391 static rtx
6392 spu_force_reg (enum machine_mode mode, rtx op)
6393 {
6394 rtx x, r;
6395 if (GET_MODE (op) == VOIDmode || GET_MODE (op) == BLKmode)
6396 {
6397 if ((SCALAR_INT_MODE_P (mode) && GET_CODE (op) == CONST_INT)
6398 || GET_MODE (op) == BLKmode)
6399 return force_reg (mode, convert_to_mode (mode, op, 0));
6400 abort ();
6401 }
6402
6403 r = force_reg (GET_MODE (op), op);
6404 if (GET_MODE_SIZE (GET_MODE (op)) == GET_MODE_SIZE (mode))
6405 {
6406 x = simplify_gen_subreg (mode, r, GET_MODE (op), 0);
6407 if (x)
6408 return x;
6409 }
6410
6411 x = gen_reg_rtx (mode);
6412 emit_insn (gen_spu_convert (x, r));
6413 return x;
6414 }
6415
6416 static void
6417 spu_check_builtin_parm (struct spu_builtin_description *d, rtx op, int p)
6418 {
6419 HOST_WIDE_INT v = 0;
6420 int lsbits;
6421 /* Check the range of immediate operands. */
6422 if (p >= SPU_BTI_7 && p <= SPU_BTI_U18)
6423 {
6424 int range = p - SPU_BTI_7;
6425
6426 if (!CONSTANT_P (op))
6427 error ("%s expects an integer literal in the range [%d, %d].",
6428 d->name,
6429 spu_builtin_range[range].low, spu_builtin_range[range].high);
6430
6431 if (GET_CODE (op) == CONST
6432 && (GET_CODE (XEXP (op, 0)) == PLUS
6433 || GET_CODE (XEXP (op, 0)) == MINUS))
6434 {
6435 v = INTVAL (XEXP (XEXP (op, 0), 1));
6436 op = XEXP (XEXP (op, 0), 0);
6437 }
6438 else if (GET_CODE (op) == CONST_INT)
6439 v = INTVAL (op);
6440 else if (GET_CODE (op) == CONST_VECTOR
6441 && GET_CODE (CONST_VECTOR_ELT (op, 0)) == CONST_INT)
6442 v = INTVAL (CONST_VECTOR_ELT (op, 0));
6443
6444 /* The default for v is 0 which is valid in every range. */
6445 if (v < spu_builtin_range[range].low
6446 || v > spu_builtin_range[range].high)
6447 error ("%s expects an integer literal in the range [%d, %d]. ("
6448 HOST_WIDE_INT_PRINT_DEC ")",
6449 d->name,
6450 spu_builtin_range[range].low, spu_builtin_range[range].high,
6451 v);
6452
6453 switch (p)
6454 {
6455 case SPU_BTI_S10_4:
6456 lsbits = 4;
6457 break;
6458 case SPU_BTI_U16_2:
6459 /* This is only used in lqa, and stqa. Even though the insns
6460 encode 16 bits of the address (all but the 2 least
6461 significant), only 14 bits are used because it is masked to
6462 be 16 byte aligned. */
6463 lsbits = 4;
6464 break;
6465 case SPU_BTI_S16_2:
6466 /* This is used for lqr and stqr. */
6467 lsbits = 2;
6468 break;
6469 default:
6470 lsbits = 0;
6471 }
6472
6473 if (GET_CODE (op) == LABEL_REF
6474 || (GET_CODE (op) == SYMBOL_REF
6475 && SYMBOL_REF_FUNCTION_P (op))
6476 || (v & ((1 << lsbits) - 1)) != 0)
6477 warning (0, "%d least significant bits of %s are ignored.", lsbits,
6478 d->name);
6479 }
6480 }
6481
6482
6483 static int
6484 expand_builtin_args (struct spu_builtin_description *d, tree exp,
6485 rtx target, rtx ops[])
6486 {
6487 enum insn_code icode = (enum insn_code) d->icode;
6488 int i = 0, a;
6489
6490 /* Expand the arguments into rtl. */
6491
6492 if (d->parm[0] != SPU_BTI_VOID)
6493 ops[i++] = target;
6494
6495 for (a = 0; d->parm[a+1] != SPU_BTI_END_OF_PARAMS; i++, a++)
6496 {
6497 tree arg = CALL_EXPR_ARG (exp, a);
6498 if (arg == 0)
6499 abort ();
6500 ops[i] = expand_expr (arg, NULL_RTX, VOIDmode, EXPAND_NORMAL);
6501 }
6502
6503 /* The insn pattern may have additional operands (SCRATCH).
6504 Return the number of actual non-SCRATCH operands. */
6505 gcc_assert (i <= insn_data[icode].n_operands);
6506 return i;
6507 }
6508
6509 static rtx
6510 spu_expand_builtin_1 (struct spu_builtin_description *d,
6511 tree exp, rtx target)
6512 {
6513 rtx pat;
6514 rtx ops[8];
6515 enum insn_code icode = (enum insn_code) d->icode;
6516 enum machine_mode mode, tmode;
6517 int i, p;
6518 int n_operands;
6519 tree return_type;
6520
6521 /* Set up ops[] with values from arglist. */
6522 n_operands = expand_builtin_args (d, exp, target, ops);
6523
6524 /* Handle the target operand which must be operand 0. */
6525 i = 0;
6526 if (d->parm[0] != SPU_BTI_VOID)
6527 {
6528
6529 /* We prefer the mode specified for the match_operand otherwise
6530 use the mode from the builtin function prototype. */
6531 tmode = insn_data[d->icode].operand[0].mode;
6532 if (tmode == VOIDmode)
6533 tmode = TYPE_MODE (spu_builtin_types[d->parm[0]]);
6534
6535 /* Try to use target because not using it can lead to extra copies
6536 and when we are using all of the registers extra copies leads
6537 to extra spills. */
6538 if (target && GET_CODE (target) == REG && GET_MODE (target) == tmode)
6539 ops[0] = target;
6540 else
6541 target = ops[0] = gen_reg_rtx (tmode);
6542
6543 if (!(*insn_data[icode].operand[0].predicate) (ops[0], tmode))
6544 abort ();
6545
6546 i++;
6547 }
6548
6549 if (d->fcode == SPU_MASK_FOR_LOAD)
6550 {
6551 enum machine_mode mode = insn_data[icode].operand[1].mode;
6552 tree arg;
6553 rtx addr, op, pat;
6554
6555 /* get addr */
6556 arg = CALL_EXPR_ARG (exp, 0);
6557 gcc_assert (POINTER_TYPE_P (TREE_TYPE (arg)));
6558 op = expand_expr (arg, NULL_RTX, Pmode, EXPAND_NORMAL);
6559 addr = memory_address (mode, op);
6560
6561 /* negate addr */
6562 op = gen_reg_rtx (GET_MODE (addr));
6563 emit_insn (gen_rtx_SET (VOIDmode, op,
6564 gen_rtx_NEG (GET_MODE (addr), addr)));
6565 op = gen_rtx_MEM (mode, op);
6566
6567 pat = GEN_FCN (icode) (target, op);
6568 if (!pat)
6569 return 0;
6570 emit_insn (pat);
6571 return target;
6572 }
6573
6574 /* Ignore align_hint, but still expand it's args in case they have
6575 side effects. */
6576 if (icode == CODE_FOR_spu_align_hint)
6577 return 0;
6578
6579 /* Handle the rest of the operands. */
6580 for (p = 1; i < n_operands; i++, p++)
6581 {
6582 if (insn_data[d->icode].operand[i].mode != VOIDmode)
6583 mode = insn_data[d->icode].operand[i].mode;
6584 else
6585 mode = TYPE_MODE (spu_builtin_types[d->parm[i]]);
6586
6587 /* mode can be VOIDmode here for labels */
6588
6589 /* For specific intrinsics with an immediate operand, e.g.,
6590 si_ai(), we sometimes need to convert the scalar argument to a
6591 vector argument by splatting the scalar. */
6592 if (VECTOR_MODE_P (mode)
6593 && (GET_CODE (ops[i]) == CONST_INT
6594 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_INT
6595 || GET_MODE_CLASS (GET_MODE (ops[i])) == MODE_FLOAT))
6596 {
6597 if (GET_CODE (ops[i]) == CONST_INT)
6598 ops[i] = spu_const (mode, INTVAL (ops[i]));
6599 else
6600 {
6601 rtx reg = gen_reg_rtx (mode);
6602 enum machine_mode imode = GET_MODE_INNER (mode);
6603 if (!spu_nonmem_operand (ops[i], GET_MODE (ops[i])))
6604 ops[i] = force_reg (GET_MODE (ops[i]), ops[i]);
6605 if (imode != GET_MODE (ops[i]))
6606 ops[i] = convert_to_mode (imode, ops[i],
6607 TYPE_UNSIGNED (spu_builtin_types
6608 [d->parm[i]]));
6609 emit_insn (gen_spu_splats (reg, ops[i]));
6610 ops[i] = reg;
6611 }
6612 }
6613
6614 spu_check_builtin_parm (d, ops[i], d->parm[p]);
6615
6616 if (!(*insn_data[icode].operand[i].predicate) (ops[i], mode))
6617 ops[i] = spu_force_reg (mode, ops[i]);
6618 }
6619
6620 switch (n_operands)
6621 {
6622 case 0:
6623 pat = GEN_FCN (icode) (0);
6624 break;
6625 case 1:
6626 pat = GEN_FCN (icode) (ops[0]);
6627 break;
6628 case 2:
6629 pat = GEN_FCN (icode) (ops[0], ops[1]);
6630 break;
6631 case 3:
6632 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2]);
6633 break;
6634 case 4:
6635 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3]);
6636 break;
6637 case 5:
6638 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4]);
6639 break;
6640 case 6:
6641 pat = GEN_FCN (icode) (ops[0], ops[1], ops[2], ops[3], ops[4], ops[5]);
6642 break;
6643 default:
6644 abort ();
6645 }
6646
6647 if (!pat)
6648 abort ();
6649
6650 if (d->type == B_CALL || d->type == B_BISLED)
6651 emit_call_insn (pat);
6652 else if (d->type == B_JUMP)
6653 {
6654 emit_jump_insn (pat);
6655 emit_barrier ();
6656 }
6657 else
6658 emit_insn (pat);
6659
6660 return_type = spu_builtin_types[d->parm[0]];
6661 if (d->parm[0] != SPU_BTI_VOID
6662 && GET_MODE (target) != TYPE_MODE (return_type))
6663 {
6664 /* target is the return value. It should always be the mode of
6665 the builtin function prototype. */
6666 target = spu_force_reg (TYPE_MODE (return_type), target);
6667 }
6668
6669 return target;
6670 }
6671
6672 rtx
6673 spu_expand_builtin (tree exp,
6674 rtx target,
6675 rtx subtarget ATTRIBUTE_UNUSED,
6676 enum machine_mode mode ATTRIBUTE_UNUSED,
6677 int ignore ATTRIBUTE_UNUSED)
6678 {
6679 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
6680 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
6681 struct spu_builtin_description *d;
6682
6683 if (fcode < NUM_SPU_BUILTINS)
6684 {
6685 d = &spu_builtins[fcode];
6686
6687 return spu_expand_builtin_1 (d, exp, target);
6688 }
6689 abort ();
6690 }
6691
6692 /* Implement targetm.vectorize.builtin_mul_widen_even. */
6693 static tree
6694 spu_builtin_mul_widen_even (tree type)
6695 {
6696 switch (TYPE_MODE (type))
6697 {
6698 case V8HImode:
6699 if (TYPE_UNSIGNED (type))
6700 return spu_builtin_decls[SPU_MULE_0];
6701 else
6702 return spu_builtin_decls[SPU_MULE_1];
6703 break;
6704 default:
6705 return NULL_TREE;
6706 }
6707 }
6708
6709 /* Implement targetm.vectorize.builtin_mul_widen_odd. */
6710 static tree
6711 spu_builtin_mul_widen_odd (tree type)
6712 {
6713 switch (TYPE_MODE (type))
6714 {
6715 case V8HImode:
6716 if (TYPE_UNSIGNED (type))
6717 return spu_builtin_decls[SPU_MULO_1];
6718 else
6719 return spu_builtin_decls[SPU_MULO_0];
6720 break;
6721 default:
6722 return NULL_TREE;
6723 }
6724 }
6725
6726 /* Implement targetm.vectorize.builtin_mask_for_load. */
6727 static tree
6728 spu_builtin_mask_for_load (void)
6729 {
6730 return spu_builtin_decls[SPU_MASK_FOR_LOAD];
6731 }
6732
6733 /* Implement targetm.vectorize.builtin_vectorization_cost. */
6734 static int
6735 spu_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
6736 tree vectype ATTRIBUTE_UNUSED,
6737 int misalign ATTRIBUTE_UNUSED)
6738 {
6739 switch (type_of_cost)
6740 {
6741 case scalar_stmt:
6742 case vector_stmt:
6743 case vector_load:
6744 case vector_store:
6745 case vec_to_scalar:
6746 case scalar_to_vec:
6747 case cond_branch_not_taken:
6748 case vec_perm:
6749 return 1;
6750
6751 case scalar_store:
6752 return 10;
6753
6754 case scalar_load:
6755 /* Load + rotate. */
6756 return 2;
6757
6758 case unaligned_load:
6759 return 2;
6760
6761 case cond_branch_taken:
6762 return 6;
6763
6764 default:
6765 gcc_unreachable ();
6766 }
6767 }
6768
6769 /* Return true iff, data reference of TYPE can reach vector alignment (16)
6770 after applying N number of iterations. This routine does not determine
6771 how may iterations are required to reach desired alignment. */
6772
6773 static bool
6774 spu_vector_alignment_reachable (const_tree type ATTRIBUTE_UNUSED, bool is_packed)
6775 {
6776 if (is_packed)
6777 return false;
6778
6779 /* All other types are naturally aligned. */
6780 return true;
6781 }
6782
6783 /* Implement targetm.vectorize.builtin_vec_perm. */
6784 tree
6785 spu_builtin_vec_perm (tree type, tree *mask_element_type)
6786 {
6787 *mask_element_type = unsigned_char_type_node;
6788
6789 switch (TYPE_MODE (type))
6790 {
6791 case V16QImode:
6792 if (TYPE_UNSIGNED (type))
6793 return spu_builtin_decls[SPU_SHUFFLE_0];
6794 else
6795 return spu_builtin_decls[SPU_SHUFFLE_1];
6796
6797 case V8HImode:
6798 if (TYPE_UNSIGNED (type))
6799 return spu_builtin_decls[SPU_SHUFFLE_2];
6800 else
6801 return spu_builtin_decls[SPU_SHUFFLE_3];
6802
6803 case V4SImode:
6804 if (TYPE_UNSIGNED (type))
6805 return spu_builtin_decls[SPU_SHUFFLE_4];
6806 else
6807 return spu_builtin_decls[SPU_SHUFFLE_5];
6808
6809 case V2DImode:
6810 if (TYPE_UNSIGNED (type))
6811 return spu_builtin_decls[SPU_SHUFFLE_6];
6812 else
6813 return spu_builtin_decls[SPU_SHUFFLE_7];
6814
6815 case V4SFmode:
6816 return spu_builtin_decls[SPU_SHUFFLE_8];
6817
6818 case V2DFmode:
6819 return spu_builtin_decls[SPU_SHUFFLE_9];
6820
6821 default:
6822 return NULL_TREE;
6823 }
6824 }
6825
6826 /* Return the appropriate mode for a named address pointer. */
6827 static enum machine_mode
6828 spu_addr_space_pointer_mode (addr_space_t addrspace)
6829 {
6830 switch (addrspace)
6831 {
6832 case ADDR_SPACE_GENERIC:
6833 return ptr_mode;
6834 case ADDR_SPACE_EA:
6835 return EAmode;
6836 default:
6837 gcc_unreachable ();
6838 }
6839 }
6840
6841 /* Return the appropriate mode for a named address address. */
6842 static enum machine_mode
6843 spu_addr_space_address_mode (addr_space_t addrspace)
6844 {
6845 switch (addrspace)
6846 {
6847 case ADDR_SPACE_GENERIC:
6848 return Pmode;
6849 case ADDR_SPACE_EA:
6850 return EAmode;
6851 default:
6852 gcc_unreachable ();
6853 }
6854 }
6855
6856 /* Determine if one named address space is a subset of another. */
6857
6858 static bool
6859 spu_addr_space_subset_p (addr_space_t subset, addr_space_t superset)
6860 {
6861 gcc_assert (subset == ADDR_SPACE_GENERIC || subset == ADDR_SPACE_EA);
6862 gcc_assert (superset == ADDR_SPACE_GENERIC || superset == ADDR_SPACE_EA);
6863
6864 if (subset == superset)
6865 return true;
6866
6867 /* If we have -mno-address-space-conversion, treat __ea and generic as not
6868 being subsets but instead as disjoint address spaces. */
6869 else if (!TARGET_ADDRESS_SPACE_CONVERSION)
6870 return false;
6871
6872 else
6873 return (subset == ADDR_SPACE_GENERIC && superset == ADDR_SPACE_EA);
6874 }
6875
6876 /* Convert from one address space to another. */
6877 static rtx
6878 spu_addr_space_convert (rtx op, tree from_type, tree to_type)
6879 {
6880 addr_space_t from_as = TYPE_ADDR_SPACE (TREE_TYPE (from_type));
6881 addr_space_t to_as = TYPE_ADDR_SPACE (TREE_TYPE (to_type));
6882
6883 gcc_assert (from_as == ADDR_SPACE_GENERIC || from_as == ADDR_SPACE_EA);
6884 gcc_assert (to_as == ADDR_SPACE_GENERIC || to_as == ADDR_SPACE_EA);
6885
6886 if (to_as == ADDR_SPACE_GENERIC && from_as == ADDR_SPACE_EA)
6887 {
6888 rtx result, ls;
6889
6890 ls = gen_const_mem (DImode,
6891 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6892 set_mem_align (ls, 128);
6893
6894 result = gen_reg_rtx (Pmode);
6895 ls = force_reg (Pmode, convert_modes (Pmode, DImode, ls, 1));
6896 op = force_reg (Pmode, convert_modes (Pmode, EAmode, op, 1));
6897 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6898 ls, const0_rtx, Pmode, 1);
6899
6900 emit_insn (gen_subsi3 (result, op, ls));
6901
6902 return result;
6903 }
6904
6905 else if (to_as == ADDR_SPACE_EA && from_as == ADDR_SPACE_GENERIC)
6906 {
6907 rtx result, ls;
6908
6909 ls = gen_const_mem (DImode,
6910 gen_rtx_SYMBOL_REF (Pmode, "__ea_local_store"));
6911 set_mem_align (ls, 128);
6912
6913 result = gen_reg_rtx (EAmode);
6914 ls = force_reg (EAmode, convert_modes (EAmode, DImode, ls, 1));
6915 op = force_reg (Pmode, op);
6916 ls = emit_conditional_move (ls, NE, op, const0_rtx, Pmode,
6917 ls, const0_rtx, EAmode, 1);
6918 op = force_reg (EAmode, convert_modes (EAmode, Pmode, op, 1));
6919
6920 if (EAmode == SImode)
6921 emit_insn (gen_addsi3 (result, op, ls));
6922 else
6923 emit_insn (gen_adddi3 (result, op, ls));
6924
6925 return result;
6926 }
6927
6928 else
6929 gcc_unreachable ();
6930 }
6931
6932
6933 /* Count the total number of instructions in each pipe and return the
6934 maximum, which is used as the Minimum Iteration Interval (MII)
6935 in the modulo scheduler. get_pipe() will return -2, -1, 0, or 1.
6936 -2 are instructions that can go in pipe0 or pipe1. */
6937 static int
6938 spu_sms_res_mii (struct ddg *g)
6939 {
6940 int i;
6941 unsigned t[4] = {0, 0, 0, 0};
6942
6943 for (i = 0; i < g->num_nodes; i++)
6944 {
6945 rtx insn = g->nodes[i].insn;
6946 int p = get_pipe (insn) + 2;
6947
6948 assert (p >= 0);
6949 assert (p < 4);
6950
6951 t[p]++;
6952 if (dump_file && INSN_P (insn))
6953 fprintf (dump_file, "i%d %s %d %d\n",
6954 INSN_UID (insn),
6955 insn_data[INSN_CODE(insn)].name,
6956 p, t[p]);
6957 }
6958 if (dump_file)
6959 fprintf (dump_file, "%d %d %d %d\n", t[0], t[1], t[2], t[3]);
6960
6961 return MAX ((t[0] + t[2] + t[3] + 1) / 2, MAX (t[2], t[3]));
6962 }
6963
6964
6965 void
6966 spu_init_expanders (void)
6967 {
6968 if (cfun)
6969 {
6970 rtx r0, r1;
6971 /* HARD_FRAME_REGISTER is only 128 bit aligned when
6972 frame_pointer_needed is true. We don't know that until we're
6973 expanding the prologue. */
6974 REGNO_POINTER_ALIGN (HARD_FRAME_POINTER_REGNUM) = 8;
6975
6976 /* A number of passes use LAST_VIRTUAL_REGISTER+1 and
6977 LAST_VIRTUAL_REGISTER+2 to test the back-end. We want them
6978 to be treated as aligned, so generate them here. */
6979 r0 = gen_reg_rtx (SImode);
6980 r1 = gen_reg_rtx (SImode);
6981 mark_reg_pointer (r0, 128);
6982 mark_reg_pointer (r1, 128);
6983 gcc_assert (REGNO (r0) == LAST_VIRTUAL_REGISTER + 1
6984 && REGNO (r1) == LAST_VIRTUAL_REGISTER + 2);
6985 }
6986 }
6987
6988 static enum machine_mode
6989 spu_libgcc_cmp_return_mode (void)
6990 {
6991
6992 /* For SPU word mode is TI mode so it is better to use SImode
6993 for compare returns. */
6994 return SImode;
6995 }
6996
6997 static enum machine_mode
6998 spu_libgcc_shift_count_mode (void)
6999 {
7000 /* For SPU word mode is TI mode so it is better to use SImode
7001 for shift counts. */
7002 return SImode;
7003 }
7004
7005 /* An early place to adjust some flags after GCC has finished processing
7006 * them. */
7007 static void
7008 asm_file_start (void)
7009 {
7010 /* Variable tracking should be run after all optimizations which
7011 change order of insns. It also needs a valid CFG. */
7012 spu_flag_var_tracking = flag_var_tracking;
7013 flag_var_tracking = 0;
7014
7015 default_file_start ();
7016 }
7017
7018 /* Implement targetm.section_type_flags. */
7019 static unsigned int
7020 spu_section_type_flags (tree decl, const char *name, int reloc)
7021 {
7022 /* .toe needs to have type @nobits. */
7023 if (strcmp (name, ".toe") == 0)
7024 return SECTION_BSS;
7025 /* Don't load _ea into the current address space. */
7026 if (strcmp (name, "._ea") == 0)
7027 return SECTION_WRITE | SECTION_DEBUG;
7028 return default_section_type_flags (decl, name, reloc);
7029 }
7030
7031 /* Implement targetm.select_section. */
7032 static section *
7033 spu_select_section (tree decl, int reloc, unsigned HOST_WIDE_INT align)
7034 {
7035 /* Variables and constants defined in the __ea address space
7036 go into a special section named "._ea". */
7037 if (TREE_TYPE (decl) != error_mark_node
7038 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) == ADDR_SPACE_EA)
7039 {
7040 /* We might get called with string constants, but get_named_section
7041 doesn't like them as they are not DECLs. Also, we need to set
7042 flags in that case. */
7043 if (!DECL_P (decl))
7044 return get_section ("._ea", SECTION_WRITE | SECTION_DEBUG, NULL);
7045
7046 return get_named_section (decl, "._ea", reloc);
7047 }
7048
7049 return default_elf_select_section (decl, reloc, align);
7050 }
7051
7052 /* Implement targetm.unique_section. */
7053 static void
7054 spu_unique_section (tree decl, int reloc)
7055 {
7056 /* We don't support unique section names in the __ea address
7057 space for now. */
7058 if (TREE_TYPE (decl) != error_mark_node
7059 && TYPE_ADDR_SPACE (TREE_TYPE (decl)) != 0)
7060 return;
7061
7062 default_unique_section (decl, reloc);
7063 }
7064
7065 /* Generate a constant or register which contains 2^SCALE. We assume
7066 the result is valid for MODE. Currently, MODE must be V4SFmode and
7067 SCALE must be SImode. */
7068 rtx
7069 spu_gen_exp2 (enum machine_mode mode, rtx scale)
7070 {
7071 gcc_assert (mode == V4SFmode);
7072 gcc_assert (GET_MODE (scale) == SImode || GET_CODE (scale) == CONST_INT);
7073 if (GET_CODE (scale) != CONST_INT)
7074 {
7075 /* unsigned int exp = (127 + scale) << 23;
7076 __vector float m = (__vector float) spu_splats (exp); */
7077 rtx reg = force_reg (SImode, scale);
7078 rtx exp = gen_reg_rtx (SImode);
7079 rtx mul = gen_reg_rtx (mode);
7080 emit_insn (gen_addsi3 (exp, reg, GEN_INT (127)));
7081 emit_insn (gen_ashlsi3 (exp, exp, GEN_INT (23)));
7082 emit_insn (gen_spu_splats (mul, gen_rtx_SUBREG (GET_MODE_INNER (mode), exp, 0)));
7083 return mul;
7084 }
7085 else
7086 {
7087 HOST_WIDE_INT exp = 127 + INTVAL (scale);
7088 unsigned char arr[16];
7089 arr[0] = arr[4] = arr[8] = arr[12] = exp >> 1;
7090 arr[1] = arr[5] = arr[9] = arr[13] = exp << 7;
7091 arr[2] = arr[6] = arr[10] = arr[14] = 0;
7092 arr[3] = arr[7] = arr[11] = arr[15] = 0;
7093 return array_to_constant (mode, arr);
7094 }
7095 }
7096
7097 /* After reload, just change the convert into a move instruction
7098 or a dead instruction. */
7099 void
7100 spu_split_convert (rtx ops[])
7101 {
7102 if (REGNO (ops[0]) == REGNO (ops[1]))
7103 emit_note (NOTE_INSN_DELETED);
7104 else
7105 {
7106 /* Use TImode always as this might help hard reg copyprop. */
7107 rtx op0 = gen_rtx_REG (TImode, REGNO (ops[0]));
7108 rtx op1 = gen_rtx_REG (TImode, REGNO (ops[1]));
7109 emit_insn (gen_move_insn (op0, op1));
7110 }
7111 }
7112
7113 void
7114 spu_function_profiler (FILE * file, int labelno)
7115 {
7116 fprintf (file, "# profile\n");
7117 fprintf (file, "brsl $75, _mcount\n");
7118 }
7119
7120 #include "gt-spu.h"