i386.c (override_options): Put initialization of ix86_tune_mask and ix86_arch_mask...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1008 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1009 generic because it is not working well with PPro base chips. */
1010 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1011 | m_GENERIC64;
1012 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1013 | m_NOCONA | m_CORE2 | m_GENERIC;
1014 const int x86_zero_extend_with_and = m_486 | m_PENT;
1015 /* Enable to zero extend integer registers to avoid partial dependencies */
1016 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1017 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1018 const int x86_double_with_add = ~m_386;
1019 const int x86_use_bit_test = m_386;
1020 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1021 | m_K6 | m_CORE2 | m_GENERIC;
1022 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1023 | m_NOCONA;
1024 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1025 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1026 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1027 /* Branch hints were put in P4 based on simulation result. But
1028 after P4 was made, no performance benefit was observed with
1029 branch hints. It also increases the code size. As the result,
1030 icc never generates branch hints. */
1031 const int x86_branch_hints = 0;
1032 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1033 /*m_GENERIC | m_ATHLON_K8 ? */
1034 /* We probably ought to watch for partial register stalls on Generic32
1035 compilation setting as well. However in current implementation the
1036 partial register stalls are not eliminated very well - they can
1037 be introduced via subregs synthesized by combine and can happen
1038 in caller/callee saving sequences.
1039 Because this option pays back little on PPro based chips and is in conflict
1040 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1041 to leave it off for generic32 for now. */
1042 const int x86_partial_reg_stall = m_PPRO;
1043 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1044 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1045 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1046 | m_CORE2 | m_GENERIC);
1047 const int x86_use_mov0 = m_K6;
1048 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1049 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1050 const int x86_use_xchgb = m_PENT4;
1051 const int x86_read_modify_write = ~m_PENT;
1052 const int x86_read_modify = ~(m_PENT | m_PPRO);
1053 const int x86_split_long_moves = m_PPRO;
1054 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1055 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1056 /* m_PENT4 ? */
1057 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1058 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1059 const int x86_qimode_math = ~(0);
1060 const int x86_promote_qi_regs = 0;
1061 /* On PPro this flag is meant to avoid partial register stalls. Just like
1062 the x86_partial_reg_stall this option might be considered for Generic32
1063 if our scheme for avoiding partial stalls was more effective. */
1064 const int x86_himode_math = ~(m_PPRO);
1065 const int x86_promote_hi_regs = m_PPRO;
1066 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1067 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1068 | m_CORE2 | m_GENERIC;
1069 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1070 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1071 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1072 | m_CORE2 | m_GENERIC;
1073 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1074 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1075 /* Enable if integer moves are preferred for DFmode copies */
1076 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1078 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1079 | m_CORE2 | m_GENERIC;
1080 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1081 | m_CORE2 | m_GENERIC;
1082 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1083 for outgoing arguments will be computed and placed into the variable
1084 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1085 for each call; instead, the function prologue should increase the stack frame
1086 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1087 not proper. */
1088 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1089 | m_NOCONA | m_PPRO | m_CORE2
1090 | m_GENERIC;
1091 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1092 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1093 const int x86_shift1 = ~m_486;
1094 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1095 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1096 | m_NOCONA | m_CORE2 | m_GENERIC;
1097 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1098 that thread 128bit SSE registers as single units versus K8 based chips that
1099 divide SSE registers to two 64bit halves.
1100 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1101 to allow register renaming on 128bit SSE units, but usually results in one
1102 extra microop on 64bit SSE units. Experimental results shows that disabling
1103 this option on P4 brings over 20% SPECfp regression, while enabling it on
1104 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1105 of moves. */
1106 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1107 | m_GENERIC | m_AMDFAM10;
1108 /* Set for machines where the type and dependencies are resolved on SSE
1109 register parts instead of whole registers, so we may maintain just
1110 lower part of scalar values in proper format leaving the upper part
1111 undefined. */
1112 const int x86_sse_split_regs = m_ATHLON_K8;
1113 /* Code generation for scalar reg-reg moves of single and double precision data:
1114 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1115 movaps reg, reg
1116 else
1117 movss reg, reg
1118 if (x86_sse_partial_reg_dependency == true)
1119 movapd reg, reg
1120 else
1121 movsd reg, reg
1122
1123 Code generation for scalar loads of double precision data:
1124 if (x86_sse_split_regs == true)
1125 movlpd mem, reg (gas syntax)
1126 else
1127 movsd mem, reg
1128
1129 Code generation for unaligned packed loads of single precision data
1130 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1131 if (x86_sse_unaligned_move_optimal)
1132 movups mem, reg
1133
1134 if (x86_sse_partial_reg_dependency == true)
1135 {
1136 xorps reg, reg
1137 movlps mem, reg
1138 movhps mem+8, reg
1139 }
1140 else
1141 {
1142 movlps mem, reg
1143 movhps mem+8, reg
1144 }
1145
1146 Code generation for unaligned packed loads of double precision data
1147 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1148 if (x86_sse_unaligned_move_optimal)
1149 movupd mem, reg
1150
1151 if (x86_sse_split_regs == true)
1152 {
1153 movlpd mem, reg
1154 movhpd mem+8, reg
1155 }
1156 else
1157 {
1158 movsd mem, reg
1159 movhpd mem+8, reg
1160 }
1161 */
1162 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1163 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1164 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1165 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1166 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1167
1168 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1169
1170 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1171 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1172 /* Some CPU cores are not able to predict more than 4 branch instructions in
1173 the 16 byte window. */
1174 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1175 | m_NOCONA | m_CORE2 | m_GENERIC;
1176 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1177 | m_CORE2 | m_GENERIC;
1178 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1179 /* Compare and exchange was added for 80486. */
1180 const int x86_cmpxchg = ~m_386;
1181 /* Compare and exchange 8 bytes was added for pentium. */
1182 const int x86_cmpxchg8b = ~(m_386 | m_486);
1183 /* Exchange and add was added for 80486. */
1184 const int x86_xadd = ~m_386;
1185 /* Byteswap was added for 80486. */
1186 const int x86_bswap = ~m_386;
1187 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1188
1189 static enum stringop_alg stringop_alg = no_stringop;
1190
1191 /* In case the average insn count for single function invocation is
1192 lower than this constant, emit fast (but longer) prologue and
1193 epilogue code. */
1194 #define FAST_PROLOGUE_INSN_COUNT 20
1195
1196 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1197 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1198 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1199 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1200
1201 /* Array of the smallest class containing reg number REGNO, indexed by
1202 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1203
1204 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1205 {
1206 /* ax, dx, cx, bx */
1207 AREG, DREG, CREG, BREG,
1208 /* si, di, bp, sp */
1209 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1210 /* FP registers */
1211 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1212 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1213 /* arg pointer */
1214 NON_Q_REGS,
1215 /* flags, fpsr, fpcr, frame */
1216 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1217 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1218 SSE_REGS, SSE_REGS,
1219 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1220 MMX_REGS, MMX_REGS,
1221 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1222 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1223 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1224 SSE_REGS, SSE_REGS,
1225 };
1226
1227 /* The "default" register map used in 32bit mode. */
1228
1229 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1230 {
1231 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1232 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1233 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1234 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1235 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1236 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1237 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1238 };
1239
1240 static int const x86_64_int_parameter_registers[6] =
1241 {
1242 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1243 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1244 };
1245
1246 static int const x86_64_int_return_registers[4] =
1247 {
1248 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1249 };
1250
1251 /* The "default" register map used in 64bit mode. */
1252 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1253 {
1254 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1255 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1256 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1257 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1258 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1259 8,9,10,11,12,13,14,15, /* extended integer registers */
1260 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1261 };
1262
1263 /* Define the register numbers to be used in Dwarf debugging information.
1264 The SVR4 reference port C compiler uses the following register numbers
1265 in its Dwarf output code:
1266 0 for %eax (gcc regno = 0)
1267 1 for %ecx (gcc regno = 2)
1268 2 for %edx (gcc regno = 1)
1269 3 for %ebx (gcc regno = 3)
1270 4 for %esp (gcc regno = 7)
1271 5 for %ebp (gcc regno = 6)
1272 6 for %esi (gcc regno = 4)
1273 7 for %edi (gcc regno = 5)
1274 The following three DWARF register numbers are never generated by
1275 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1276 believes these numbers have these meanings.
1277 8 for %eip (no gcc equivalent)
1278 9 for %eflags (gcc regno = 17)
1279 10 for %trapno (no gcc equivalent)
1280 It is not at all clear how we should number the FP stack registers
1281 for the x86 architecture. If the version of SDB on x86/svr4 were
1282 a bit less brain dead with respect to floating-point then we would
1283 have a precedent to follow with respect to DWARF register numbers
1284 for x86 FP registers, but the SDB on x86/svr4 is so completely
1285 broken with respect to FP registers that it is hardly worth thinking
1286 of it as something to strive for compatibility with.
1287 The version of x86/svr4 SDB I have at the moment does (partially)
1288 seem to believe that DWARF register number 11 is associated with
1289 the x86 register %st(0), but that's about all. Higher DWARF
1290 register numbers don't seem to be associated with anything in
1291 particular, and even for DWARF regno 11, SDB only seems to under-
1292 stand that it should say that a variable lives in %st(0) (when
1293 asked via an `=' command) if we said it was in DWARF regno 11,
1294 but SDB still prints garbage when asked for the value of the
1295 variable in question (via a `/' command).
1296 (Also note that the labels SDB prints for various FP stack regs
1297 when doing an `x' command are all wrong.)
1298 Note that these problems generally don't affect the native SVR4
1299 C compiler because it doesn't allow the use of -O with -g and
1300 because when it is *not* optimizing, it allocates a memory
1301 location for each floating-point variable, and the memory
1302 location is what gets described in the DWARF AT_location
1303 attribute for the variable in question.
1304 Regardless of the severe mental illness of the x86/svr4 SDB, we
1305 do something sensible here and we use the following DWARF
1306 register numbers. Note that these are all stack-top-relative
1307 numbers.
1308 11 for %st(0) (gcc regno = 8)
1309 12 for %st(1) (gcc regno = 9)
1310 13 for %st(2) (gcc regno = 10)
1311 14 for %st(3) (gcc regno = 11)
1312 15 for %st(4) (gcc regno = 12)
1313 16 for %st(5) (gcc regno = 13)
1314 17 for %st(6) (gcc regno = 14)
1315 18 for %st(7) (gcc regno = 15)
1316 */
1317 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1318 {
1319 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1320 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1321 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1322 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1323 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1324 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1325 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1326 };
1327
1328 /* Test and compare insns in i386.md store the information needed to
1329 generate branch and scc insns here. */
1330
1331 rtx ix86_compare_op0 = NULL_RTX;
1332 rtx ix86_compare_op1 = NULL_RTX;
1333 rtx ix86_compare_emitted = NULL_RTX;
1334
1335 /* Size of the register save area. */
1336 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1337
1338 /* Define the structure for the machine field in struct function. */
1339
1340 struct stack_local_entry GTY(())
1341 {
1342 unsigned short mode;
1343 unsigned short n;
1344 rtx rtl;
1345 struct stack_local_entry *next;
1346 };
1347
1348 /* Structure describing stack frame layout.
1349 Stack grows downward:
1350
1351 [arguments]
1352 <- ARG_POINTER
1353 saved pc
1354
1355 saved frame pointer if frame_pointer_needed
1356 <- HARD_FRAME_POINTER
1357 [saved regs]
1358
1359 [padding1] \
1360 )
1361 [va_arg registers] (
1362 > to_allocate <- FRAME_POINTER
1363 [frame] (
1364 )
1365 [padding2] /
1366 */
1367 struct ix86_frame
1368 {
1369 int nregs;
1370 int padding1;
1371 int va_arg_size;
1372 HOST_WIDE_INT frame;
1373 int padding2;
1374 int outgoing_arguments_size;
1375 int red_zone_size;
1376
1377 HOST_WIDE_INT to_allocate;
1378 /* The offsets relative to ARG_POINTER. */
1379 HOST_WIDE_INT frame_pointer_offset;
1380 HOST_WIDE_INT hard_frame_pointer_offset;
1381 HOST_WIDE_INT stack_pointer_offset;
1382
1383 /* When save_regs_using_mov is set, emit prologue using
1384 move instead of push instructions. */
1385 bool save_regs_using_mov;
1386 };
1387
1388 /* Code model option. */
1389 enum cmodel ix86_cmodel;
1390 /* Asm dialect. */
1391 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1392 /* TLS dialects. */
1393 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1394
1395 /* Which unit we are generating floating point math for. */
1396 enum fpmath_unit ix86_fpmath;
1397
1398 /* Which cpu are we scheduling for. */
1399 enum processor_type ix86_tune;
1400 int ix86_tune_mask;
1401
1402 /* Which instruction set architecture to use. */
1403 enum processor_type ix86_arch;
1404 int ix86_arch_mask;
1405
1406 /* true if sse prefetch instruction is not NOOP. */
1407 int x86_prefetch_sse;
1408
1409 /* true if cmpxchg16b is supported. */
1410 int x86_cmpxchg16b;
1411
1412 /* ix86_regparm_string as a number */
1413 static int ix86_regparm;
1414
1415 /* -mstackrealign option */
1416 extern int ix86_force_align_arg_pointer;
1417 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1418
1419 /* Preferred alignment for stack boundary in bits. */
1420 unsigned int ix86_preferred_stack_boundary;
1421
1422 /* Values 1-5: see jump.c */
1423 int ix86_branch_cost;
1424
1425 /* Variables which are this size or smaller are put in the data/bss
1426 or ldata/lbss sections. */
1427
1428 int ix86_section_threshold = 65536;
1429
1430 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1431 char internal_label_prefix[16];
1432 int internal_label_prefix_len;
1433 \f
1434 static bool ix86_handle_option (size_t, const char *, int);
1435 static void output_pic_addr_const (FILE *, rtx, int);
1436 static void put_condition_code (enum rtx_code, enum machine_mode,
1437 int, int, FILE *);
1438 static const char *get_some_local_dynamic_name (void);
1439 static int get_some_local_dynamic_name_1 (rtx *, void *);
1440 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1441 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1442 rtx *);
1443 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1444 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1445 enum machine_mode);
1446 static rtx get_thread_pointer (int);
1447 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1448 static void get_pc_thunk_name (char [32], unsigned int);
1449 static rtx gen_push (rtx);
1450 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1451 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1452 static struct machine_function * ix86_init_machine_status (void);
1453 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1454 static int ix86_nsaved_regs (void);
1455 static void ix86_emit_save_regs (void);
1456 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1457 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1458 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1459 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1460 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1461 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1462 static int ix86_issue_rate (void);
1463 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1464 static int ia32_multipass_dfa_lookahead (void);
1465 static void ix86_init_mmx_sse_builtins (void);
1466 static rtx x86_this_parameter (tree);
1467 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1468 HOST_WIDE_INT, tree);
1469 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1470 static void x86_file_start (void);
1471 static void ix86_reorg (void);
1472 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1473 static tree ix86_build_builtin_va_list (void);
1474 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1475 tree, int *, int);
1476 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1477 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1478 static bool ix86_vector_mode_supported_p (enum machine_mode);
1479
1480 static int ix86_address_cost (rtx);
1481 static bool ix86_cannot_force_const_mem (rtx);
1482 static rtx ix86_delegitimize_address (rtx);
1483
1484 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1485
1486 struct builtin_description;
1487 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1488 tree, rtx);
1489 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1490 tree, rtx);
1491 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1492 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1493 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1494 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1495 static rtx safe_vector_operand (rtx, enum machine_mode);
1496 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1497 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1498 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1499 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1500 static int ix86_fp_comparison_cost (enum rtx_code code);
1501 static unsigned int ix86_select_alt_pic_regnum (void);
1502 static int ix86_save_reg (unsigned int, int);
1503 static void ix86_compute_frame_layout (struct ix86_frame *);
1504 static int ix86_comp_type_attributes (tree, tree);
1505 static int ix86_function_regparm (tree, tree);
1506 const struct attribute_spec ix86_attribute_table[];
1507 static bool ix86_function_ok_for_sibcall (tree, tree);
1508 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1509 static int ix86_value_regno (enum machine_mode, tree, tree);
1510 static bool contains_128bit_aligned_vector_p (tree);
1511 static rtx ix86_struct_value_rtx (tree, int);
1512 static bool ix86_ms_bitfield_layout_p (tree);
1513 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1514 static int extended_reg_mentioned_1 (rtx *, void *);
1515 static bool ix86_rtx_costs (rtx, int, int, int *);
1516 static int min_insn_size (rtx);
1517 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1518 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1519 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1520 tree, bool);
1521 static void ix86_init_builtins (void);
1522 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1523 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1524 static tree ix86_builtin_conversion (enum tree_code, tree);
1525 static const char *ix86_mangle_fundamental_type (tree);
1526 static tree ix86_stack_protect_fail (void);
1527 static rtx ix86_internal_arg_pointer (void);
1528 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1529 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1530 rtx, rtx, int);
1531
1532 /* This function is only used on Solaris. */
1533 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1534 ATTRIBUTE_UNUSED;
1535
1536 /* Register class used for passing given 64bit part of the argument.
1537 These represent classes as documented by the PS ABI, with the exception
1538 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1539 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1540
1541 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1542 whenever possible (upper half does contain padding).
1543 */
1544 enum x86_64_reg_class
1545 {
1546 X86_64_NO_CLASS,
1547 X86_64_INTEGER_CLASS,
1548 X86_64_INTEGERSI_CLASS,
1549 X86_64_SSE_CLASS,
1550 X86_64_SSESF_CLASS,
1551 X86_64_SSEDF_CLASS,
1552 X86_64_SSEUP_CLASS,
1553 X86_64_X87_CLASS,
1554 X86_64_X87UP_CLASS,
1555 X86_64_COMPLEX_X87_CLASS,
1556 X86_64_MEMORY_CLASS
1557 };
1558 static const char * const x86_64_reg_class_name[] = {
1559 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1560 "sseup", "x87", "x87up", "cplx87", "no"
1561 };
1562
1563 #define MAX_CLASSES 4
1564
1565 /* Table of constants used by fldpi, fldln2, etc.... */
1566 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1567 static bool ext_80387_constants_init = 0;
1568 static void init_ext_80387_constants (void);
1569 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1570 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1571 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1572 static section *x86_64_elf_select_section (tree decl, int reloc,
1573 unsigned HOST_WIDE_INT align)
1574 ATTRIBUTE_UNUSED;
1575 \f
1576 /* Initialize the GCC target structure. */
1577 #undef TARGET_ATTRIBUTE_TABLE
1578 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1579 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1580 # undef TARGET_MERGE_DECL_ATTRIBUTES
1581 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1582 #endif
1583
1584 #undef TARGET_COMP_TYPE_ATTRIBUTES
1585 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1586
1587 #undef TARGET_INIT_BUILTINS
1588 #define TARGET_INIT_BUILTINS ix86_init_builtins
1589 #undef TARGET_EXPAND_BUILTIN
1590 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1591
1592 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1593 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1594 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1595 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1596
1597 #undef TARGET_ASM_FUNCTION_EPILOGUE
1598 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1599
1600 #undef TARGET_ENCODE_SECTION_INFO
1601 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1602 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1603 #else
1604 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1605 #endif
1606
1607 #undef TARGET_ASM_OPEN_PAREN
1608 #define TARGET_ASM_OPEN_PAREN ""
1609 #undef TARGET_ASM_CLOSE_PAREN
1610 #define TARGET_ASM_CLOSE_PAREN ""
1611
1612 #undef TARGET_ASM_ALIGNED_HI_OP
1613 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1614 #undef TARGET_ASM_ALIGNED_SI_OP
1615 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1616 #ifdef ASM_QUAD
1617 #undef TARGET_ASM_ALIGNED_DI_OP
1618 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1619 #endif
1620
1621 #undef TARGET_ASM_UNALIGNED_HI_OP
1622 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1623 #undef TARGET_ASM_UNALIGNED_SI_OP
1624 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1625 #undef TARGET_ASM_UNALIGNED_DI_OP
1626 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1627
1628 #undef TARGET_SCHED_ADJUST_COST
1629 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1630 #undef TARGET_SCHED_ISSUE_RATE
1631 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1632 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1633 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1634 ia32_multipass_dfa_lookahead
1635
1636 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1637 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1638
1639 #ifdef HAVE_AS_TLS
1640 #undef TARGET_HAVE_TLS
1641 #define TARGET_HAVE_TLS true
1642 #endif
1643 #undef TARGET_CANNOT_FORCE_CONST_MEM
1644 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1645 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1646 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1647
1648 #undef TARGET_DELEGITIMIZE_ADDRESS
1649 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1650
1651 #undef TARGET_MS_BITFIELD_LAYOUT_P
1652 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1653
1654 #if TARGET_MACHO
1655 #undef TARGET_BINDS_LOCAL_P
1656 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1657 #endif
1658
1659 #undef TARGET_ASM_OUTPUT_MI_THUNK
1660 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1661 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1662 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1663
1664 #undef TARGET_ASM_FILE_START
1665 #define TARGET_ASM_FILE_START x86_file_start
1666
1667 #undef TARGET_DEFAULT_TARGET_FLAGS
1668 #define TARGET_DEFAULT_TARGET_FLAGS \
1669 (TARGET_DEFAULT \
1670 | TARGET_64BIT_DEFAULT \
1671 | TARGET_SUBTARGET_DEFAULT \
1672 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1673
1674 #undef TARGET_HANDLE_OPTION
1675 #define TARGET_HANDLE_OPTION ix86_handle_option
1676
1677 #undef TARGET_RTX_COSTS
1678 #define TARGET_RTX_COSTS ix86_rtx_costs
1679 #undef TARGET_ADDRESS_COST
1680 #define TARGET_ADDRESS_COST ix86_address_cost
1681
1682 #undef TARGET_FIXED_CONDITION_CODE_REGS
1683 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1684 #undef TARGET_CC_MODES_COMPATIBLE
1685 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1686
1687 #undef TARGET_MACHINE_DEPENDENT_REORG
1688 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1689
1690 #undef TARGET_BUILD_BUILTIN_VA_LIST
1691 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1692
1693 #undef TARGET_MD_ASM_CLOBBERS
1694 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1695
1696 #undef TARGET_PROMOTE_PROTOTYPES
1697 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1698 #undef TARGET_STRUCT_VALUE_RTX
1699 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1700 #undef TARGET_SETUP_INCOMING_VARARGS
1701 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1702 #undef TARGET_MUST_PASS_IN_STACK
1703 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1704 #undef TARGET_PASS_BY_REFERENCE
1705 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1706 #undef TARGET_INTERNAL_ARG_POINTER
1707 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1708 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1709 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1710
1711 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1712 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1713
1714 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1715 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1716
1717 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1718 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1719
1720 #ifdef HAVE_AS_TLS
1721 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1722 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1723 #endif
1724
1725 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1726 #undef TARGET_INSERT_ATTRIBUTES
1727 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1728 #endif
1729
1730 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1731 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1732
1733 #undef TARGET_STACK_PROTECT_FAIL
1734 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1735
1736 #undef TARGET_FUNCTION_VALUE
1737 #define TARGET_FUNCTION_VALUE ix86_function_value
1738
1739 struct gcc_target targetm = TARGET_INITIALIZER;
1740
1741 \f
1742 /* The svr4 ABI for the i386 says that records and unions are returned
1743 in memory. */
1744 #ifndef DEFAULT_PCC_STRUCT_RETURN
1745 #define DEFAULT_PCC_STRUCT_RETURN 1
1746 #endif
1747
1748 /* Implement TARGET_HANDLE_OPTION. */
1749
1750 static bool
1751 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1752 {
1753 switch (code)
1754 {
1755 case OPT_m3dnow:
1756 if (!value)
1757 {
1758 target_flags &= ~MASK_3DNOW_A;
1759 target_flags_explicit |= MASK_3DNOW_A;
1760 }
1761 return true;
1762
1763 case OPT_mmmx:
1764 if (!value)
1765 {
1766 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1767 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1768 }
1769 return true;
1770
1771 case OPT_msse:
1772 if (!value)
1773 {
1774 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1775 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1776 }
1777 return true;
1778
1779 case OPT_msse2:
1780 if (!value)
1781 {
1782 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1783 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1784 }
1785 return true;
1786
1787 case OPT_msse3:
1788 if (!value)
1789 {
1790 target_flags &= ~MASK_SSE4A;
1791 target_flags_explicit |= MASK_SSE4A;
1792 }
1793 return true;
1794
1795 default:
1796 return true;
1797 }
1798 }
1799
1800 /* Sometimes certain combinations of command options do not make
1801 sense on a particular target machine. You can define a macro
1802 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1803 defined, is executed once just after all the command options have
1804 been parsed.
1805
1806 Don't use this macro to turn on various extra optimizations for
1807 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1808
1809 void
1810 override_options (void)
1811 {
1812 int i;
1813 int ix86_tune_defaulted = 0;
1814
1815 /* Comes from final.c -- no real reason to change it. */
1816 #define MAX_CODE_ALIGN 16
1817
1818 static struct ptt
1819 {
1820 const struct processor_costs *cost; /* Processor costs */
1821 const int target_enable; /* Target flags to enable. */
1822 const int target_disable; /* Target flags to disable. */
1823 const int align_loop; /* Default alignments. */
1824 const int align_loop_max_skip;
1825 const int align_jump;
1826 const int align_jump_max_skip;
1827 const int align_func;
1828 }
1829 const processor_target_table[PROCESSOR_max] =
1830 {
1831 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1832 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1833 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1834 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1835 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1836 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1837 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1838 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1839 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1840 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1841 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1842 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1843 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1844 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1845 };
1846
1847 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1848 static struct pta
1849 {
1850 const char *const name; /* processor name or nickname. */
1851 const enum processor_type processor;
1852 const enum pta_flags
1853 {
1854 PTA_SSE = 1,
1855 PTA_SSE2 = 2,
1856 PTA_SSE3 = 4,
1857 PTA_MMX = 8,
1858 PTA_PREFETCH_SSE = 16,
1859 PTA_3DNOW = 32,
1860 PTA_3DNOW_A = 64,
1861 PTA_64BIT = 128,
1862 PTA_SSSE3 = 256,
1863 PTA_CX16 = 512,
1864 PTA_POPCNT = 1024,
1865 PTA_ABM = 2048,
1866 PTA_SSE4A = 4096
1867 } flags;
1868 }
1869 const processor_alias_table[] =
1870 {
1871 {"i386", PROCESSOR_I386, 0},
1872 {"i486", PROCESSOR_I486, 0},
1873 {"i586", PROCESSOR_PENTIUM, 0},
1874 {"pentium", PROCESSOR_PENTIUM, 0},
1875 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1876 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1877 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1878 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1879 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1880 {"i686", PROCESSOR_PENTIUMPRO, 0},
1881 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1882 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1883 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1884 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1885 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1886 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1887 | PTA_MMX | PTA_PREFETCH_SSE},
1888 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1889 | PTA_MMX | PTA_PREFETCH_SSE},
1890 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1891 | PTA_MMX | PTA_PREFETCH_SSE},
1892 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1893 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1894 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1895 | PTA_64BIT | PTA_MMX
1896 | PTA_PREFETCH_SSE | PTA_CX16},
1897 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1898 | PTA_3DNOW_A},
1899 {"k6", PROCESSOR_K6, PTA_MMX},
1900 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1901 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1902 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1903 | PTA_3DNOW_A},
1904 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1905 | PTA_3DNOW | PTA_3DNOW_A},
1906 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1907 | PTA_3DNOW_A | PTA_SSE},
1908 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1909 | PTA_3DNOW_A | PTA_SSE},
1910 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1911 | PTA_3DNOW_A | PTA_SSE},
1912 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1913 | PTA_SSE | PTA_SSE2 },
1914 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1915 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1916 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1917 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1918 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1919 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1920 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1921 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1922 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1923 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1924 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1925 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1926 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1927 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1928 };
1929
1930 int const pta_size = ARRAY_SIZE (processor_alias_table);
1931
1932 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1933 SUBTARGET_OVERRIDE_OPTIONS;
1934 #endif
1935
1936 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1937 SUBSUBTARGET_OVERRIDE_OPTIONS;
1938 #endif
1939
1940 /* -fPIC is the default for x86_64. */
1941 if (TARGET_MACHO && TARGET_64BIT)
1942 flag_pic = 2;
1943
1944 /* Set the default values for switches whose default depends on TARGET_64BIT
1945 in case they weren't overwritten by command line options. */
1946 if (TARGET_64BIT)
1947 {
1948 /* Mach-O doesn't support omitting the frame pointer for now. */
1949 if (flag_omit_frame_pointer == 2)
1950 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1951 if (flag_asynchronous_unwind_tables == 2)
1952 flag_asynchronous_unwind_tables = 1;
1953 if (flag_pcc_struct_return == 2)
1954 flag_pcc_struct_return = 0;
1955 }
1956 else
1957 {
1958 if (flag_omit_frame_pointer == 2)
1959 flag_omit_frame_pointer = 0;
1960 if (flag_asynchronous_unwind_tables == 2)
1961 flag_asynchronous_unwind_tables = 0;
1962 if (flag_pcc_struct_return == 2)
1963 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1964 }
1965
1966 /* Need to check -mtune=generic first. */
1967 if (ix86_tune_string)
1968 {
1969 if (!strcmp (ix86_tune_string, "generic")
1970 || !strcmp (ix86_tune_string, "i686")
1971 /* As special support for cross compilers we read -mtune=native
1972 as -mtune=generic. With native compilers we won't see the
1973 -mtune=native, as it was changed by the driver. */
1974 || !strcmp (ix86_tune_string, "native"))
1975 {
1976 if (TARGET_64BIT)
1977 ix86_tune_string = "generic64";
1978 else
1979 ix86_tune_string = "generic32";
1980 }
1981 else if (!strncmp (ix86_tune_string, "generic", 7))
1982 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1983 }
1984 else
1985 {
1986 if (ix86_arch_string)
1987 ix86_tune_string = ix86_arch_string;
1988 if (!ix86_tune_string)
1989 {
1990 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1991 ix86_tune_defaulted = 1;
1992 }
1993
1994 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1995 need to use a sensible tune option. */
1996 if (!strcmp (ix86_tune_string, "generic")
1997 || !strcmp (ix86_tune_string, "x86-64")
1998 || !strcmp (ix86_tune_string, "i686"))
1999 {
2000 if (TARGET_64BIT)
2001 ix86_tune_string = "generic64";
2002 else
2003 ix86_tune_string = "generic32";
2004 }
2005 }
2006 if (ix86_stringop_string)
2007 {
2008 if (!strcmp (ix86_stringop_string, "rep_byte"))
2009 stringop_alg = rep_prefix_1_byte;
2010 else if (!strcmp (ix86_stringop_string, "libcall"))
2011 stringop_alg = libcall;
2012 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2013 stringop_alg = rep_prefix_4_byte;
2014 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2015 stringop_alg = rep_prefix_8_byte;
2016 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2017 stringop_alg = loop_1_byte;
2018 else if (!strcmp (ix86_stringop_string, "loop"))
2019 stringop_alg = loop;
2020 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2021 stringop_alg = unrolled_loop;
2022 else
2023 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2024 }
2025 if (!strcmp (ix86_tune_string, "x86-64"))
2026 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2027 "-mtune=generic instead as appropriate.");
2028
2029 if (!ix86_arch_string)
2030 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2031 if (!strcmp (ix86_arch_string, "generic"))
2032 error ("generic CPU can be used only for -mtune= switch");
2033 if (!strncmp (ix86_arch_string, "generic", 7))
2034 error ("bad value (%s) for -march= switch", ix86_arch_string);
2035
2036 if (ix86_cmodel_string != 0)
2037 {
2038 if (!strcmp (ix86_cmodel_string, "small"))
2039 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2040 else if (!strcmp (ix86_cmodel_string, "medium"))
2041 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2042 else if (flag_pic)
2043 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2044 else if (!strcmp (ix86_cmodel_string, "32"))
2045 ix86_cmodel = CM_32;
2046 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2047 ix86_cmodel = CM_KERNEL;
2048 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2049 ix86_cmodel = CM_LARGE;
2050 else
2051 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2052 }
2053 else
2054 {
2055 ix86_cmodel = CM_32;
2056 if (TARGET_64BIT)
2057 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2058 }
2059 if (ix86_asm_string != 0)
2060 {
2061 if (! TARGET_MACHO
2062 && !strcmp (ix86_asm_string, "intel"))
2063 ix86_asm_dialect = ASM_INTEL;
2064 else if (!strcmp (ix86_asm_string, "att"))
2065 ix86_asm_dialect = ASM_ATT;
2066 else
2067 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2068 }
2069 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2070 error ("code model %qs not supported in the %s bit mode",
2071 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2072 if (ix86_cmodel == CM_LARGE)
2073 sorry ("code model %<large%> not supported yet");
2074 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2075 sorry ("%i-bit mode not compiled in",
2076 (target_flags & MASK_64BIT) ? 64 : 32);
2077
2078 for (i = 0; i < pta_size; i++)
2079 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2080 {
2081 ix86_arch = processor_alias_table[i].processor;
2082 /* Default cpu tuning to the architecture. */
2083 ix86_tune = ix86_arch;
2084 if (processor_alias_table[i].flags & PTA_MMX
2085 && !(target_flags_explicit & MASK_MMX))
2086 target_flags |= MASK_MMX;
2087 if (processor_alias_table[i].flags & PTA_3DNOW
2088 && !(target_flags_explicit & MASK_3DNOW))
2089 target_flags |= MASK_3DNOW;
2090 if (processor_alias_table[i].flags & PTA_3DNOW_A
2091 && !(target_flags_explicit & MASK_3DNOW_A))
2092 target_flags |= MASK_3DNOW_A;
2093 if (processor_alias_table[i].flags & PTA_SSE
2094 && !(target_flags_explicit & MASK_SSE))
2095 target_flags |= MASK_SSE;
2096 if (processor_alias_table[i].flags & PTA_SSE2
2097 && !(target_flags_explicit & MASK_SSE2))
2098 target_flags |= MASK_SSE2;
2099 if (processor_alias_table[i].flags & PTA_SSE3
2100 && !(target_flags_explicit & MASK_SSE3))
2101 target_flags |= MASK_SSE3;
2102 if (processor_alias_table[i].flags & PTA_SSSE3
2103 && !(target_flags_explicit & MASK_SSSE3))
2104 target_flags |= MASK_SSSE3;
2105 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2106 x86_prefetch_sse = true;
2107 if (processor_alias_table[i].flags & PTA_CX16)
2108 x86_cmpxchg16b = true;
2109 if (processor_alias_table[i].flags & PTA_POPCNT
2110 && !(target_flags_explicit & MASK_POPCNT))
2111 target_flags |= MASK_POPCNT;
2112 if (processor_alias_table[i].flags & PTA_ABM
2113 && !(target_flags_explicit & MASK_ABM))
2114 target_flags |= MASK_ABM;
2115 if (processor_alias_table[i].flags & PTA_SSE4A
2116 && !(target_flags_explicit & MASK_SSE4A))
2117 target_flags |= MASK_SSE4A;
2118 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2119 error ("CPU you selected does not support x86-64 "
2120 "instruction set");
2121 break;
2122 }
2123
2124 if (i == pta_size)
2125 error ("bad value (%s) for -march= switch", ix86_arch_string);
2126
2127 for (i = 0; i < pta_size; i++)
2128 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2129 {
2130 ix86_tune = processor_alias_table[i].processor;
2131 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2132 {
2133 if (ix86_tune_defaulted)
2134 {
2135 ix86_tune_string = "x86-64";
2136 for (i = 0; i < pta_size; i++)
2137 if (! strcmp (ix86_tune_string,
2138 processor_alias_table[i].name))
2139 break;
2140 ix86_tune = processor_alias_table[i].processor;
2141 }
2142 else
2143 error ("CPU you selected does not support x86-64 "
2144 "instruction set");
2145 }
2146 /* Intel CPUs have always interpreted SSE prefetch instructions as
2147 NOPs; so, we can enable SSE prefetch instructions even when
2148 -mtune (rather than -march) points us to a processor that has them.
2149 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2150 higher processors. */
2151 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2152 x86_prefetch_sse = true;
2153 break;
2154 }
2155 if (i == pta_size)
2156 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2157
2158 ix86_arch_mask = 1 << ix86_arch;
2159 ix86_tune_mask = 1 << ix86_tune;
2160
2161 if (optimize_size)
2162 ix86_cost = &size_cost;
2163 else
2164 ix86_cost = processor_target_table[ix86_tune].cost;
2165 target_flags |= processor_target_table[ix86_tune].target_enable;
2166 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2167
2168 /* Arrange to set up i386_stack_locals for all functions. */
2169 init_machine_status = ix86_init_machine_status;
2170
2171 /* Validate -mregparm= value. */
2172 if (ix86_regparm_string)
2173 {
2174 i = atoi (ix86_regparm_string);
2175 if (i < 0 || i > REGPARM_MAX)
2176 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2177 else
2178 ix86_regparm = i;
2179 }
2180 else
2181 if (TARGET_64BIT)
2182 ix86_regparm = REGPARM_MAX;
2183
2184 /* If the user has provided any of the -malign-* options,
2185 warn and use that value only if -falign-* is not set.
2186 Remove this code in GCC 3.2 or later. */
2187 if (ix86_align_loops_string)
2188 {
2189 warning (0, "-malign-loops is obsolete, use -falign-loops");
2190 if (align_loops == 0)
2191 {
2192 i = atoi (ix86_align_loops_string);
2193 if (i < 0 || i > MAX_CODE_ALIGN)
2194 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2195 else
2196 align_loops = 1 << i;
2197 }
2198 }
2199
2200 if (ix86_align_jumps_string)
2201 {
2202 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2203 if (align_jumps == 0)
2204 {
2205 i = atoi (ix86_align_jumps_string);
2206 if (i < 0 || i > MAX_CODE_ALIGN)
2207 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2208 else
2209 align_jumps = 1 << i;
2210 }
2211 }
2212
2213 if (ix86_align_funcs_string)
2214 {
2215 warning (0, "-malign-functions is obsolete, use -falign-functions");
2216 if (align_functions == 0)
2217 {
2218 i = atoi (ix86_align_funcs_string);
2219 if (i < 0 || i > MAX_CODE_ALIGN)
2220 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2221 else
2222 align_functions = 1 << i;
2223 }
2224 }
2225
2226 /* Default align_* from the processor table. */
2227 if (align_loops == 0)
2228 {
2229 align_loops = processor_target_table[ix86_tune].align_loop;
2230 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2231 }
2232 if (align_jumps == 0)
2233 {
2234 align_jumps = processor_target_table[ix86_tune].align_jump;
2235 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2236 }
2237 if (align_functions == 0)
2238 {
2239 align_functions = processor_target_table[ix86_tune].align_func;
2240 }
2241
2242 /* Validate -mbranch-cost= value, or provide default. */
2243 ix86_branch_cost = ix86_cost->branch_cost;
2244 if (ix86_branch_cost_string)
2245 {
2246 i = atoi (ix86_branch_cost_string);
2247 if (i < 0 || i > 5)
2248 error ("-mbranch-cost=%d is not between 0 and 5", i);
2249 else
2250 ix86_branch_cost = i;
2251 }
2252 if (ix86_section_threshold_string)
2253 {
2254 i = atoi (ix86_section_threshold_string);
2255 if (i < 0)
2256 error ("-mlarge-data-threshold=%d is negative", i);
2257 else
2258 ix86_section_threshold = i;
2259 }
2260
2261 if (ix86_tls_dialect_string)
2262 {
2263 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2264 ix86_tls_dialect = TLS_DIALECT_GNU;
2265 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2266 ix86_tls_dialect = TLS_DIALECT_GNU2;
2267 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2268 ix86_tls_dialect = TLS_DIALECT_SUN;
2269 else
2270 error ("bad value (%s) for -mtls-dialect= switch",
2271 ix86_tls_dialect_string);
2272 }
2273
2274 /* Keep nonleaf frame pointers. */
2275 if (flag_omit_frame_pointer)
2276 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2277 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2278 flag_omit_frame_pointer = 1;
2279
2280 /* If we're doing fast math, we don't care about comparison order
2281 wrt NaNs. This lets us use a shorter comparison sequence. */
2282 if (flag_finite_math_only)
2283 target_flags &= ~MASK_IEEE_FP;
2284
2285 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2286 since the insns won't need emulation. */
2287 if (x86_arch_always_fancy_math_387 & ARCHMASK)
2288 target_flags &= ~MASK_NO_FANCY_MATH_387;
2289
2290 /* Likewise, if the target doesn't have a 387, or we've specified
2291 software floating point, don't use 387 inline intrinsics. */
2292 if (!TARGET_80387)
2293 target_flags |= MASK_NO_FANCY_MATH_387;
2294
2295 /* Turn on SSE3 builtins for -mssse3. */
2296 if (TARGET_SSSE3)
2297 target_flags |= MASK_SSE3;
2298
2299 /* Turn on SSE3 builtins for -msse4a. */
2300 if (TARGET_SSE4A)
2301 target_flags |= MASK_SSE3;
2302
2303 /* Turn on SSE2 builtins for -msse3. */
2304 if (TARGET_SSE3)
2305 target_flags |= MASK_SSE2;
2306
2307 /* Turn on SSE builtins for -msse2. */
2308 if (TARGET_SSE2)
2309 target_flags |= MASK_SSE;
2310
2311 /* Turn on MMX builtins for -msse. */
2312 if (TARGET_SSE)
2313 {
2314 target_flags |= MASK_MMX & ~target_flags_explicit;
2315 x86_prefetch_sse = true;
2316 }
2317
2318 /* Turn on MMX builtins for 3Dnow. */
2319 if (TARGET_3DNOW)
2320 target_flags |= MASK_MMX;
2321
2322 /* Turn on POPCNT builtins for -mabm. */
2323 if (TARGET_ABM)
2324 target_flags |= MASK_POPCNT;
2325
2326 if (TARGET_64BIT)
2327 {
2328 if (TARGET_ALIGN_DOUBLE)
2329 error ("-malign-double makes no sense in the 64bit mode");
2330 if (TARGET_RTD)
2331 error ("-mrtd calling convention not supported in the 64bit mode");
2332
2333 /* Enable by default the SSE and MMX builtins. Do allow the user to
2334 explicitly disable any of these. In particular, disabling SSE and
2335 MMX for kernel code is extremely useful. */
2336 target_flags
2337 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2338 & ~target_flags_explicit);
2339 }
2340 else
2341 {
2342 /* i386 ABI does not specify red zone. It still makes sense to use it
2343 when programmer takes care to stack from being destroyed. */
2344 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2345 target_flags |= MASK_NO_RED_ZONE;
2346 }
2347
2348 /* Validate -mpreferred-stack-boundary= value, or provide default.
2349 The default of 128 bits is for Pentium III's SSE __m128. We can't
2350 change it because of optimize_size. Otherwise, we can't mix object
2351 files compiled with -Os and -On. */
2352 ix86_preferred_stack_boundary = 128;
2353 if (ix86_preferred_stack_boundary_string)
2354 {
2355 i = atoi (ix86_preferred_stack_boundary_string);
2356 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2357 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2358 TARGET_64BIT ? 4 : 2);
2359 else
2360 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2361 }
2362
2363 /* Accept -msseregparm only if at least SSE support is enabled. */
2364 if (TARGET_SSEREGPARM
2365 && ! TARGET_SSE)
2366 error ("-msseregparm used without SSE enabled");
2367
2368 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2369
2370 if (ix86_fpmath_string != 0)
2371 {
2372 if (! strcmp (ix86_fpmath_string, "387"))
2373 ix86_fpmath = FPMATH_387;
2374 else if (! strcmp (ix86_fpmath_string, "sse"))
2375 {
2376 if (!TARGET_SSE)
2377 {
2378 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2379 ix86_fpmath = FPMATH_387;
2380 }
2381 else
2382 ix86_fpmath = FPMATH_SSE;
2383 }
2384 else if (! strcmp (ix86_fpmath_string, "387,sse")
2385 || ! strcmp (ix86_fpmath_string, "sse,387"))
2386 {
2387 if (!TARGET_SSE)
2388 {
2389 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2390 ix86_fpmath = FPMATH_387;
2391 }
2392 else if (!TARGET_80387)
2393 {
2394 warning (0, "387 instruction set disabled, using SSE arithmetics");
2395 ix86_fpmath = FPMATH_SSE;
2396 }
2397 else
2398 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2399 }
2400 else
2401 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2402 }
2403
2404 /* If the i387 is disabled, then do not return values in it. */
2405 if (!TARGET_80387)
2406 target_flags &= ~MASK_FLOAT_RETURNS;
2407
2408 if ((x86_accumulate_outgoing_args & TUNEMASK)
2409 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2410 && !optimize_size)
2411 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2412
2413 /* ??? Unwind info is not correct around the CFG unless either a frame
2414 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2415 unwind info generation to be aware of the CFG and propagating states
2416 around edges. */
2417 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2418 || flag_exceptions || flag_non_call_exceptions)
2419 && flag_omit_frame_pointer
2420 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2421 {
2422 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2423 warning (0, "unwind tables currently require either a frame pointer "
2424 "or -maccumulate-outgoing-args for correctness");
2425 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2426 }
2427
2428 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2429 {
2430 char *p;
2431 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2432 p = strchr (internal_label_prefix, 'X');
2433 internal_label_prefix_len = p - internal_label_prefix;
2434 *p = '\0';
2435 }
2436
2437 /* When scheduling description is not available, disable scheduler pass
2438 so it won't slow down the compilation and make x87 code slower. */
2439 if (!TARGET_SCHEDULE)
2440 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2441
2442 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2443 set_param_value ("simultaneous-prefetches",
2444 ix86_cost->simultaneous_prefetches);
2445 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2446 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2447 }
2448 \f
2449 /* switch to the appropriate section for output of DECL.
2450 DECL is either a `VAR_DECL' node or a constant of some sort.
2451 RELOC indicates whether forming the initial value of DECL requires
2452 link-time relocations. */
2453
2454 static section *
2455 x86_64_elf_select_section (tree decl, int reloc,
2456 unsigned HOST_WIDE_INT align)
2457 {
2458 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2459 && ix86_in_large_data_p (decl))
2460 {
2461 const char *sname = NULL;
2462 unsigned int flags = SECTION_WRITE;
2463 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2464 {
2465 case SECCAT_DATA:
2466 sname = ".ldata";
2467 break;
2468 case SECCAT_DATA_REL:
2469 sname = ".ldata.rel";
2470 break;
2471 case SECCAT_DATA_REL_LOCAL:
2472 sname = ".ldata.rel.local";
2473 break;
2474 case SECCAT_DATA_REL_RO:
2475 sname = ".ldata.rel.ro";
2476 break;
2477 case SECCAT_DATA_REL_RO_LOCAL:
2478 sname = ".ldata.rel.ro.local";
2479 break;
2480 case SECCAT_BSS:
2481 sname = ".lbss";
2482 flags |= SECTION_BSS;
2483 break;
2484 case SECCAT_RODATA:
2485 case SECCAT_RODATA_MERGE_STR:
2486 case SECCAT_RODATA_MERGE_STR_INIT:
2487 case SECCAT_RODATA_MERGE_CONST:
2488 sname = ".lrodata";
2489 flags = 0;
2490 break;
2491 case SECCAT_SRODATA:
2492 case SECCAT_SDATA:
2493 case SECCAT_SBSS:
2494 gcc_unreachable ();
2495 case SECCAT_TEXT:
2496 case SECCAT_TDATA:
2497 case SECCAT_TBSS:
2498 /* We don't split these for medium model. Place them into
2499 default sections and hope for best. */
2500 break;
2501 }
2502 if (sname)
2503 {
2504 /* We might get called with string constants, but get_named_section
2505 doesn't like them as they are not DECLs. Also, we need to set
2506 flags in that case. */
2507 if (!DECL_P (decl))
2508 return get_section (sname, flags, NULL);
2509 return get_named_section (decl, sname, reloc);
2510 }
2511 }
2512 return default_elf_select_section (decl, reloc, align);
2513 }
2514
2515 /* Build up a unique section name, expressed as a
2516 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2517 RELOC indicates whether the initial value of EXP requires
2518 link-time relocations. */
2519
2520 static void
2521 x86_64_elf_unique_section (tree decl, int reloc)
2522 {
2523 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2524 && ix86_in_large_data_p (decl))
2525 {
2526 const char *prefix = NULL;
2527 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2528 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2529
2530 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2531 {
2532 case SECCAT_DATA:
2533 case SECCAT_DATA_REL:
2534 case SECCAT_DATA_REL_LOCAL:
2535 case SECCAT_DATA_REL_RO:
2536 case SECCAT_DATA_REL_RO_LOCAL:
2537 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2538 break;
2539 case SECCAT_BSS:
2540 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2541 break;
2542 case SECCAT_RODATA:
2543 case SECCAT_RODATA_MERGE_STR:
2544 case SECCAT_RODATA_MERGE_STR_INIT:
2545 case SECCAT_RODATA_MERGE_CONST:
2546 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2547 break;
2548 case SECCAT_SRODATA:
2549 case SECCAT_SDATA:
2550 case SECCAT_SBSS:
2551 gcc_unreachable ();
2552 case SECCAT_TEXT:
2553 case SECCAT_TDATA:
2554 case SECCAT_TBSS:
2555 /* We don't split these for medium model. Place them into
2556 default sections and hope for best. */
2557 break;
2558 }
2559 if (prefix)
2560 {
2561 const char *name;
2562 size_t nlen, plen;
2563 char *string;
2564 plen = strlen (prefix);
2565
2566 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2567 name = targetm.strip_name_encoding (name);
2568 nlen = strlen (name);
2569
2570 string = alloca (nlen + plen + 1);
2571 memcpy (string, prefix, plen);
2572 memcpy (string + plen, name, nlen + 1);
2573
2574 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2575 return;
2576 }
2577 }
2578 default_unique_section (decl, reloc);
2579 }
2580
2581 #ifdef COMMON_ASM_OP
2582 /* This says how to output assembler code to declare an
2583 uninitialized external linkage data object.
2584
2585 For medium model x86-64 we need to use .largecomm opcode for
2586 large objects. */
2587 void
2588 x86_elf_aligned_common (FILE *file,
2589 const char *name, unsigned HOST_WIDE_INT size,
2590 int align)
2591 {
2592 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2593 && size > (unsigned int)ix86_section_threshold)
2594 fprintf (file, ".largecomm\t");
2595 else
2596 fprintf (file, "%s", COMMON_ASM_OP);
2597 assemble_name (file, name);
2598 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2599 size, align / BITS_PER_UNIT);
2600 }
2601 #endif
2602 /* Utility function for targets to use in implementing
2603 ASM_OUTPUT_ALIGNED_BSS. */
2604
2605 void
2606 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2607 const char *name, unsigned HOST_WIDE_INT size,
2608 int align)
2609 {
2610 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2611 && size > (unsigned int)ix86_section_threshold)
2612 switch_to_section (get_named_section (decl, ".lbss", 0));
2613 else
2614 switch_to_section (bss_section);
2615 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2616 #ifdef ASM_DECLARE_OBJECT_NAME
2617 last_assemble_variable_decl = decl;
2618 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2619 #else
2620 /* Standard thing is just output label for the object. */
2621 ASM_OUTPUT_LABEL (file, name);
2622 #endif /* ASM_DECLARE_OBJECT_NAME */
2623 ASM_OUTPUT_SKIP (file, size ? size : 1);
2624 }
2625 \f
2626 void
2627 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2628 {
2629 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2630 make the problem with not enough registers even worse. */
2631 #ifdef INSN_SCHEDULING
2632 if (level > 1)
2633 flag_schedule_insns = 0;
2634 #endif
2635
2636 if (TARGET_MACHO)
2637 /* The Darwin libraries never set errno, so we might as well
2638 avoid calling them when that's the only reason we would. */
2639 flag_errno_math = 0;
2640
2641 /* The default values of these switches depend on the TARGET_64BIT
2642 that is not known at this moment. Mark these values with 2 and
2643 let user the to override these. In case there is no command line option
2644 specifying them, we will set the defaults in override_options. */
2645 if (optimize >= 1)
2646 flag_omit_frame_pointer = 2;
2647 flag_pcc_struct_return = 2;
2648 flag_asynchronous_unwind_tables = 2;
2649 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2650 SUBTARGET_OPTIMIZATION_OPTIONS;
2651 #endif
2652 }
2653 \f
2654 /* Table of valid machine attributes. */
2655 const struct attribute_spec ix86_attribute_table[] =
2656 {
2657 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2658 /* Stdcall attribute says callee is responsible for popping arguments
2659 if they are not variable. */
2660 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2661 /* Fastcall attribute says callee is responsible for popping arguments
2662 if they are not variable. */
2663 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2664 /* Cdecl attribute says the callee is a normal C declaration */
2665 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2666 /* Regparm attribute specifies how many integer arguments are to be
2667 passed in registers. */
2668 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2669 /* Sseregparm attribute says we are using x86_64 calling conventions
2670 for FP arguments. */
2671 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2672 /* force_align_arg_pointer says this function realigns the stack at entry. */
2673 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2674 false, true, true, ix86_handle_cconv_attribute },
2675 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2676 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2677 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2678 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2679 #endif
2680 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2681 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2682 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2683 SUBTARGET_ATTRIBUTE_TABLE,
2684 #endif
2685 { NULL, 0, 0, false, false, false, NULL }
2686 };
2687
2688 /* Decide whether we can make a sibling call to a function. DECL is the
2689 declaration of the function being targeted by the call and EXP is the
2690 CALL_EXPR representing the call. */
2691
2692 static bool
2693 ix86_function_ok_for_sibcall (tree decl, tree exp)
2694 {
2695 tree func;
2696 rtx a, b;
2697
2698 /* If we are generating position-independent code, we cannot sibcall
2699 optimize any indirect call, or a direct call to a global function,
2700 as the PLT requires %ebx be live. */
2701 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2702 return false;
2703
2704 if (decl)
2705 func = decl;
2706 else
2707 {
2708 func = TREE_TYPE (CALL_EXPR_FN (exp));
2709 if (POINTER_TYPE_P (func))
2710 func = TREE_TYPE (func);
2711 }
2712
2713 /* Check that the return value locations are the same. Like
2714 if we are returning floats on the 80387 register stack, we cannot
2715 make a sibcall from a function that doesn't return a float to a
2716 function that does or, conversely, from a function that does return
2717 a float to a function that doesn't; the necessary stack adjustment
2718 would not be executed. This is also the place we notice
2719 differences in the return value ABI. Note that it is ok for one
2720 of the functions to have void return type as long as the return
2721 value of the other is passed in a register. */
2722 a = ix86_function_value (TREE_TYPE (exp), func, false);
2723 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2724 cfun->decl, false);
2725 if (STACK_REG_P (a) || STACK_REG_P (b))
2726 {
2727 if (!rtx_equal_p (a, b))
2728 return false;
2729 }
2730 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2731 ;
2732 else if (!rtx_equal_p (a, b))
2733 return false;
2734
2735 /* If this call is indirect, we'll need to be able to use a call-clobbered
2736 register for the address of the target function. Make sure that all
2737 such registers are not used for passing parameters. */
2738 if (!decl && !TARGET_64BIT)
2739 {
2740 tree type;
2741
2742 /* We're looking at the CALL_EXPR, we need the type of the function. */
2743 type = CALL_EXPR_FN (exp); /* pointer expression */
2744 type = TREE_TYPE (type); /* pointer type */
2745 type = TREE_TYPE (type); /* function type */
2746
2747 if (ix86_function_regparm (type, NULL) >= 3)
2748 {
2749 /* ??? Need to count the actual number of registers to be used,
2750 not the possible number of registers. Fix later. */
2751 return false;
2752 }
2753 }
2754
2755 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2756 /* Dllimport'd functions are also called indirectly. */
2757 if (decl && DECL_DLLIMPORT_P (decl)
2758 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2759 return false;
2760 #endif
2761
2762 /* If we forced aligned the stack, then sibcalling would unalign the
2763 stack, which may break the called function. */
2764 if (cfun->machine->force_align_arg_pointer)
2765 return false;
2766
2767 /* Otherwise okay. That also includes certain types of indirect calls. */
2768 return true;
2769 }
2770
2771 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2772 calling convention attributes;
2773 arguments as in struct attribute_spec.handler. */
2774
2775 static tree
2776 ix86_handle_cconv_attribute (tree *node, tree name,
2777 tree args,
2778 int flags ATTRIBUTE_UNUSED,
2779 bool *no_add_attrs)
2780 {
2781 if (TREE_CODE (*node) != FUNCTION_TYPE
2782 && TREE_CODE (*node) != METHOD_TYPE
2783 && TREE_CODE (*node) != FIELD_DECL
2784 && TREE_CODE (*node) != TYPE_DECL)
2785 {
2786 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2787 IDENTIFIER_POINTER (name));
2788 *no_add_attrs = true;
2789 return NULL_TREE;
2790 }
2791
2792 /* Can combine regparm with all attributes but fastcall. */
2793 if (is_attribute_p ("regparm", name))
2794 {
2795 tree cst;
2796
2797 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2798 {
2799 error ("fastcall and regparm attributes are not compatible");
2800 }
2801
2802 cst = TREE_VALUE (args);
2803 if (TREE_CODE (cst) != INTEGER_CST)
2804 {
2805 warning (OPT_Wattributes,
2806 "%qs attribute requires an integer constant argument",
2807 IDENTIFIER_POINTER (name));
2808 *no_add_attrs = true;
2809 }
2810 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2811 {
2812 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2813 IDENTIFIER_POINTER (name), REGPARM_MAX);
2814 *no_add_attrs = true;
2815 }
2816
2817 if (!TARGET_64BIT
2818 && lookup_attribute (ix86_force_align_arg_pointer_string,
2819 TYPE_ATTRIBUTES (*node))
2820 && compare_tree_int (cst, REGPARM_MAX-1))
2821 {
2822 error ("%s functions limited to %d register parameters",
2823 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2824 }
2825
2826 return NULL_TREE;
2827 }
2828
2829 if (TARGET_64BIT)
2830 {
2831 warning (OPT_Wattributes, "%qs attribute ignored",
2832 IDENTIFIER_POINTER (name));
2833 *no_add_attrs = true;
2834 return NULL_TREE;
2835 }
2836
2837 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2838 if (is_attribute_p ("fastcall", name))
2839 {
2840 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2841 {
2842 error ("fastcall and cdecl attributes are not compatible");
2843 }
2844 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2845 {
2846 error ("fastcall and stdcall attributes are not compatible");
2847 }
2848 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2849 {
2850 error ("fastcall and regparm attributes are not compatible");
2851 }
2852 }
2853
2854 /* Can combine stdcall with fastcall (redundant), regparm and
2855 sseregparm. */
2856 else if (is_attribute_p ("stdcall", name))
2857 {
2858 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2859 {
2860 error ("stdcall and cdecl attributes are not compatible");
2861 }
2862 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2863 {
2864 error ("stdcall and fastcall attributes are not compatible");
2865 }
2866 }
2867
2868 /* Can combine cdecl with regparm and sseregparm. */
2869 else if (is_attribute_p ("cdecl", name))
2870 {
2871 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2872 {
2873 error ("stdcall and cdecl attributes are not compatible");
2874 }
2875 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2876 {
2877 error ("fastcall and cdecl attributes are not compatible");
2878 }
2879 }
2880
2881 /* Can combine sseregparm with all attributes. */
2882
2883 return NULL_TREE;
2884 }
2885
2886 /* Return 0 if the attributes for two types are incompatible, 1 if they
2887 are compatible, and 2 if they are nearly compatible (which causes a
2888 warning to be generated). */
2889
2890 static int
2891 ix86_comp_type_attributes (tree type1, tree type2)
2892 {
2893 /* Check for mismatch of non-default calling convention. */
2894 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2895
2896 if (TREE_CODE (type1) != FUNCTION_TYPE)
2897 return 1;
2898
2899 /* Check for mismatched fastcall/regparm types. */
2900 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2901 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2902 || (ix86_function_regparm (type1, NULL)
2903 != ix86_function_regparm (type2, NULL)))
2904 return 0;
2905
2906 /* Check for mismatched sseregparm types. */
2907 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2908 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2909 return 0;
2910
2911 /* Check for mismatched return types (cdecl vs stdcall). */
2912 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2913 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2914 return 0;
2915
2916 return 1;
2917 }
2918 \f
2919 /* Return the regparm value for a function with the indicated TYPE and DECL.
2920 DECL may be NULL when calling function indirectly
2921 or considering a libcall. */
2922
2923 static int
2924 ix86_function_regparm (tree type, tree decl)
2925 {
2926 tree attr;
2927 int regparm = ix86_regparm;
2928 bool user_convention = false;
2929
2930 if (!TARGET_64BIT)
2931 {
2932 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2933 if (attr)
2934 {
2935 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2936 user_convention = true;
2937 }
2938
2939 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2940 {
2941 regparm = 2;
2942 user_convention = true;
2943 }
2944
2945 /* Use register calling convention for local functions when possible. */
2946 if (!TARGET_64BIT && !user_convention && decl
2947 && flag_unit_at_a_time && !profile_flag)
2948 {
2949 struct cgraph_local_info *i = cgraph_local_info (decl);
2950 if (i && i->local)
2951 {
2952 int local_regparm, globals = 0, regno;
2953
2954 /* Make sure no regparm register is taken by a global register
2955 variable. */
2956 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2957 if (global_regs[local_regparm])
2958 break;
2959 /* We can't use regparm(3) for nested functions as these use
2960 static chain pointer in third argument. */
2961 if (local_regparm == 3
2962 && decl_function_context (decl)
2963 && !DECL_NO_STATIC_CHAIN (decl))
2964 local_regparm = 2;
2965 /* If the function realigns its stackpointer, the
2966 prologue will clobber %ecx. If we've already
2967 generated code for the callee, the callee
2968 DECL_STRUCT_FUNCTION is gone, so we fall back to
2969 scanning the attributes for the self-realigning
2970 property. */
2971 if ((DECL_STRUCT_FUNCTION (decl)
2972 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2973 || (!DECL_STRUCT_FUNCTION (decl)
2974 && lookup_attribute (ix86_force_align_arg_pointer_string,
2975 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2976 local_regparm = 2;
2977 /* Each global register variable increases register preassure,
2978 so the more global reg vars there are, the smaller regparm
2979 optimization use, unless requested by the user explicitly. */
2980 for (regno = 0; regno < 6; regno++)
2981 if (global_regs[regno])
2982 globals++;
2983 local_regparm
2984 = globals < local_regparm ? local_regparm - globals : 0;
2985
2986 if (local_regparm > regparm)
2987 regparm = local_regparm;
2988 }
2989 }
2990 }
2991 return regparm;
2992 }
2993
2994 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2995 DFmode (2) arguments in SSE registers for a function with the
2996 indicated TYPE and DECL. DECL may be NULL when calling function
2997 indirectly or considering a libcall. Otherwise return 0. */
2998
2999 static int
3000 ix86_function_sseregparm (tree type, tree decl)
3001 {
3002 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3003 by the sseregparm attribute. */
3004 if (TARGET_SSEREGPARM
3005 || (type
3006 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3007 {
3008 if (!TARGET_SSE)
3009 {
3010 if (decl)
3011 error ("Calling %qD with attribute sseregparm without "
3012 "SSE/SSE2 enabled", decl);
3013 else
3014 error ("Calling %qT with attribute sseregparm without "
3015 "SSE/SSE2 enabled", type);
3016 return 0;
3017 }
3018
3019 return 2;
3020 }
3021
3022 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3023 (and DFmode for SSE2) arguments in SSE registers,
3024 even for 32-bit targets. */
3025 if (!TARGET_64BIT && decl
3026 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3027 {
3028 struct cgraph_local_info *i = cgraph_local_info (decl);
3029 if (i && i->local)
3030 return TARGET_SSE2 ? 2 : 1;
3031 }
3032
3033 return 0;
3034 }
3035
3036 /* Return true if EAX is live at the start of the function. Used by
3037 ix86_expand_prologue to determine if we need special help before
3038 calling allocate_stack_worker. */
3039
3040 static bool
3041 ix86_eax_live_at_start_p (void)
3042 {
3043 /* Cheat. Don't bother working forward from ix86_function_regparm
3044 to the function type to whether an actual argument is located in
3045 eax. Instead just look at cfg info, which is still close enough
3046 to correct at this point. This gives false positives for broken
3047 functions that might use uninitialized data that happens to be
3048 allocated in eax, but who cares? */
3049 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3050 }
3051
3052 /* Value is the number of bytes of arguments automatically
3053 popped when returning from a subroutine call.
3054 FUNDECL is the declaration node of the function (as a tree),
3055 FUNTYPE is the data type of the function (as a tree),
3056 or for a library call it is an identifier node for the subroutine name.
3057 SIZE is the number of bytes of arguments passed on the stack.
3058
3059 On the 80386, the RTD insn may be used to pop them if the number
3060 of args is fixed, but if the number is variable then the caller
3061 must pop them all. RTD can't be used for library calls now
3062 because the library is compiled with the Unix compiler.
3063 Use of RTD is a selectable option, since it is incompatible with
3064 standard Unix calling sequences. If the option is not selected,
3065 the caller must always pop the args.
3066
3067 The attribute stdcall is equivalent to RTD on a per module basis. */
3068
3069 int
3070 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3071 {
3072 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3073
3074 /* Cdecl functions override -mrtd, and never pop the stack. */
3075 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3076
3077 /* Stdcall and fastcall functions will pop the stack if not
3078 variable args. */
3079 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3080 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3081 rtd = 1;
3082
3083 if (rtd
3084 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3085 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3086 == void_type_node)))
3087 return size;
3088 }
3089
3090 /* Lose any fake structure return argument if it is passed on the stack. */
3091 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3092 && !TARGET_64BIT
3093 && !KEEP_AGGREGATE_RETURN_POINTER)
3094 {
3095 int nregs = ix86_function_regparm (funtype, fundecl);
3096
3097 if (!nregs)
3098 return GET_MODE_SIZE (Pmode);
3099 }
3100
3101 return 0;
3102 }
3103 \f
3104 /* Argument support functions. */
3105
3106 /* Return true when register may be used to pass function parameters. */
3107 bool
3108 ix86_function_arg_regno_p (int regno)
3109 {
3110 int i;
3111 if (!TARGET_64BIT)
3112 {
3113 if (TARGET_MACHO)
3114 return (regno < REGPARM_MAX
3115 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3116 else
3117 return (regno < REGPARM_MAX
3118 || (TARGET_MMX && MMX_REGNO_P (regno)
3119 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3120 || (TARGET_SSE && SSE_REGNO_P (regno)
3121 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3122 }
3123
3124 if (TARGET_MACHO)
3125 {
3126 if (SSE_REGNO_P (regno) && TARGET_SSE)
3127 return true;
3128 }
3129 else
3130 {
3131 if (TARGET_SSE && SSE_REGNO_P (regno)
3132 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3133 return true;
3134 }
3135 /* RAX is used as hidden argument to va_arg functions. */
3136 if (!regno)
3137 return true;
3138 for (i = 0; i < REGPARM_MAX; i++)
3139 if (regno == x86_64_int_parameter_registers[i])
3140 return true;
3141 return false;
3142 }
3143
3144 /* Return if we do not know how to pass TYPE solely in registers. */
3145
3146 static bool
3147 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3148 {
3149 if (must_pass_in_stack_var_size_or_pad (mode, type))
3150 return true;
3151
3152 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3153 The layout_type routine is crafty and tries to trick us into passing
3154 currently unsupported vector types on the stack by using TImode. */
3155 return (!TARGET_64BIT && mode == TImode
3156 && type && TREE_CODE (type) != VECTOR_TYPE);
3157 }
3158
3159 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3160 for a call to a function whose data type is FNTYPE.
3161 For a library call, FNTYPE is 0. */
3162
3163 void
3164 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3165 tree fntype, /* tree ptr for function decl */
3166 rtx libname, /* SYMBOL_REF of library name or 0 */
3167 tree fndecl)
3168 {
3169 static CUMULATIVE_ARGS zero_cum;
3170 tree param, next_param;
3171
3172 if (TARGET_DEBUG_ARG)
3173 {
3174 fprintf (stderr, "\ninit_cumulative_args (");
3175 if (fntype)
3176 fprintf (stderr, "fntype code = %s, ret code = %s",
3177 tree_code_name[(int) TREE_CODE (fntype)],
3178 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3179 else
3180 fprintf (stderr, "no fntype");
3181
3182 if (libname)
3183 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3184 }
3185
3186 *cum = zero_cum;
3187
3188 /* Set up the number of registers to use for passing arguments. */
3189 cum->nregs = ix86_regparm;
3190 if (TARGET_SSE)
3191 cum->sse_nregs = SSE_REGPARM_MAX;
3192 if (TARGET_MMX)
3193 cum->mmx_nregs = MMX_REGPARM_MAX;
3194 cum->warn_sse = true;
3195 cum->warn_mmx = true;
3196 cum->maybe_vaarg = false;
3197
3198 /* Use ecx and edx registers if function has fastcall attribute,
3199 else look for regparm information. */
3200 if (fntype && !TARGET_64BIT)
3201 {
3202 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3203 {
3204 cum->nregs = 2;
3205 cum->fastcall = 1;
3206 }
3207 else
3208 cum->nregs = ix86_function_regparm (fntype, fndecl);
3209 }
3210
3211 /* Set up the number of SSE registers used for passing SFmode
3212 and DFmode arguments. Warn for mismatching ABI. */
3213 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3214
3215 /* Determine if this function has variable arguments. This is
3216 indicated by the last argument being 'void_type_mode' if there
3217 are no variable arguments. If there are variable arguments, then
3218 we won't pass anything in registers in 32-bit mode. */
3219
3220 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3221 {
3222 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3223 param != 0; param = next_param)
3224 {
3225 next_param = TREE_CHAIN (param);
3226 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3227 {
3228 if (!TARGET_64BIT)
3229 {
3230 cum->nregs = 0;
3231 cum->sse_nregs = 0;
3232 cum->mmx_nregs = 0;
3233 cum->warn_sse = 0;
3234 cum->warn_mmx = 0;
3235 cum->fastcall = 0;
3236 cum->float_in_sse = 0;
3237 }
3238 cum->maybe_vaarg = true;
3239 }
3240 }
3241 }
3242 if ((!fntype && !libname)
3243 || (fntype && !TYPE_ARG_TYPES (fntype)))
3244 cum->maybe_vaarg = true;
3245
3246 if (TARGET_DEBUG_ARG)
3247 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3248
3249 return;
3250 }
3251
3252 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3253 But in the case of vector types, it is some vector mode.
3254
3255 When we have only some of our vector isa extensions enabled, then there
3256 are some modes for which vector_mode_supported_p is false. For these
3257 modes, the generic vector support in gcc will choose some non-vector mode
3258 in order to implement the type. By computing the natural mode, we'll
3259 select the proper ABI location for the operand and not depend on whatever
3260 the middle-end decides to do with these vector types. */
3261
3262 static enum machine_mode
3263 type_natural_mode (tree type)
3264 {
3265 enum machine_mode mode = TYPE_MODE (type);
3266
3267 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3268 {
3269 HOST_WIDE_INT size = int_size_in_bytes (type);
3270 if ((size == 8 || size == 16)
3271 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3272 && TYPE_VECTOR_SUBPARTS (type) > 1)
3273 {
3274 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3275
3276 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3277 mode = MIN_MODE_VECTOR_FLOAT;
3278 else
3279 mode = MIN_MODE_VECTOR_INT;
3280
3281 /* Get the mode which has this inner mode and number of units. */
3282 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3283 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3284 && GET_MODE_INNER (mode) == innermode)
3285 return mode;
3286
3287 gcc_unreachable ();
3288 }
3289 }
3290
3291 return mode;
3292 }
3293
3294 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3295 this may not agree with the mode that the type system has chosen for the
3296 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3297 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3298
3299 static rtx
3300 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3301 unsigned int regno)
3302 {
3303 rtx tmp;
3304
3305 if (orig_mode != BLKmode)
3306 tmp = gen_rtx_REG (orig_mode, regno);
3307 else
3308 {
3309 tmp = gen_rtx_REG (mode, regno);
3310 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3311 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3312 }
3313
3314 return tmp;
3315 }
3316
3317 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3318 of this code is to classify each 8bytes of incoming argument by the register
3319 class and assign registers accordingly. */
3320
3321 /* Return the union class of CLASS1 and CLASS2.
3322 See the x86-64 PS ABI for details. */
3323
3324 static enum x86_64_reg_class
3325 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3326 {
3327 /* Rule #1: If both classes are equal, this is the resulting class. */
3328 if (class1 == class2)
3329 return class1;
3330
3331 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3332 the other class. */
3333 if (class1 == X86_64_NO_CLASS)
3334 return class2;
3335 if (class2 == X86_64_NO_CLASS)
3336 return class1;
3337
3338 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3339 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3340 return X86_64_MEMORY_CLASS;
3341
3342 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3343 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3344 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3345 return X86_64_INTEGERSI_CLASS;
3346 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3347 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3348 return X86_64_INTEGER_CLASS;
3349
3350 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3351 MEMORY is used. */
3352 if (class1 == X86_64_X87_CLASS
3353 || class1 == X86_64_X87UP_CLASS
3354 || class1 == X86_64_COMPLEX_X87_CLASS
3355 || class2 == X86_64_X87_CLASS
3356 || class2 == X86_64_X87UP_CLASS
3357 || class2 == X86_64_COMPLEX_X87_CLASS)
3358 return X86_64_MEMORY_CLASS;
3359
3360 /* Rule #6: Otherwise class SSE is used. */
3361 return X86_64_SSE_CLASS;
3362 }
3363
3364 /* Classify the argument of type TYPE and mode MODE.
3365 CLASSES will be filled by the register class used to pass each word
3366 of the operand. The number of words is returned. In case the parameter
3367 should be passed in memory, 0 is returned. As a special case for zero
3368 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3369
3370 BIT_OFFSET is used internally for handling records and specifies offset
3371 of the offset in bits modulo 256 to avoid overflow cases.
3372
3373 See the x86-64 PS ABI for details.
3374 */
3375
3376 static int
3377 classify_argument (enum machine_mode mode, tree type,
3378 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3379 {
3380 HOST_WIDE_INT bytes =
3381 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3382 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3383
3384 /* Variable sized entities are always passed/returned in memory. */
3385 if (bytes < 0)
3386 return 0;
3387
3388 if (mode != VOIDmode
3389 && targetm.calls.must_pass_in_stack (mode, type))
3390 return 0;
3391
3392 if (type && AGGREGATE_TYPE_P (type))
3393 {
3394 int i;
3395 tree field;
3396 enum x86_64_reg_class subclasses[MAX_CLASSES];
3397
3398 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3399 if (bytes > 16)
3400 return 0;
3401
3402 for (i = 0; i < words; i++)
3403 classes[i] = X86_64_NO_CLASS;
3404
3405 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3406 signalize memory class, so handle it as special case. */
3407 if (!words)
3408 {
3409 classes[0] = X86_64_NO_CLASS;
3410 return 1;
3411 }
3412
3413 /* Classify each field of record and merge classes. */
3414 switch (TREE_CODE (type))
3415 {
3416 case RECORD_TYPE:
3417 /* And now merge the fields of structure. */
3418 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3419 {
3420 if (TREE_CODE (field) == FIELD_DECL)
3421 {
3422 int num;
3423
3424 if (TREE_TYPE (field) == error_mark_node)
3425 continue;
3426
3427 /* Bitfields are always classified as integer. Handle them
3428 early, since later code would consider them to be
3429 misaligned integers. */
3430 if (DECL_BIT_FIELD (field))
3431 {
3432 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3433 i < ((int_bit_position (field) + (bit_offset % 64))
3434 + tree_low_cst (DECL_SIZE (field), 0)
3435 + 63) / 8 / 8; i++)
3436 classes[i] =
3437 merge_classes (X86_64_INTEGER_CLASS,
3438 classes[i]);
3439 }
3440 else
3441 {
3442 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3443 TREE_TYPE (field), subclasses,
3444 (int_bit_position (field)
3445 + bit_offset) % 256);
3446 if (!num)
3447 return 0;
3448 for (i = 0; i < num; i++)
3449 {
3450 int pos =
3451 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3452 classes[i + pos] =
3453 merge_classes (subclasses[i], classes[i + pos]);
3454 }
3455 }
3456 }
3457 }
3458 break;
3459
3460 case ARRAY_TYPE:
3461 /* Arrays are handled as small records. */
3462 {
3463 int num;
3464 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3465 TREE_TYPE (type), subclasses, bit_offset);
3466 if (!num)
3467 return 0;
3468
3469 /* The partial classes are now full classes. */
3470 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3471 subclasses[0] = X86_64_SSE_CLASS;
3472 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3473 subclasses[0] = X86_64_INTEGER_CLASS;
3474
3475 for (i = 0; i < words; i++)
3476 classes[i] = subclasses[i % num];
3477
3478 break;
3479 }
3480 case UNION_TYPE:
3481 case QUAL_UNION_TYPE:
3482 /* Unions are similar to RECORD_TYPE but offset is always 0.
3483 */
3484 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3485 {
3486 if (TREE_CODE (field) == FIELD_DECL)
3487 {
3488 int num;
3489
3490 if (TREE_TYPE (field) == error_mark_node)
3491 continue;
3492
3493 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3494 TREE_TYPE (field), subclasses,
3495 bit_offset);
3496 if (!num)
3497 return 0;
3498 for (i = 0; i < num; i++)
3499 classes[i] = merge_classes (subclasses[i], classes[i]);
3500 }
3501 }
3502 break;
3503
3504 default:
3505 gcc_unreachable ();
3506 }
3507
3508 /* Final merger cleanup. */
3509 for (i = 0; i < words; i++)
3510 {
3511 /* If one class is MEMORY, everything should be passed in
3512 memory. */
3513 if (classes[i] == X86_64_MEMORY_CLASS)
3514 return 0;
3515
3516 /* The X86_64_SSEUP_CLASS should be always preceded by
3517 X86_64_SSE_CLASS. */
3518 if (classes[i] == X86_64_SSEUP_CLASS
3519 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3520 classes[i] = X86_64_SSE_CLASS;
3521
3522 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3523 if (classes[i] == X86_64_X87UP_CLASS
3524 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3525 classes[i] = X86_64_SSE_CLASS;
3526 }
3527 return words;
3528 }
3529
3530 /* Compute alignment needed. We align all types to natural boundaries with
3531 exception of XFmode that is aligned to 64bits. */
3532 if (mode != VOIDmode && mode != BLKmode)
3533 {
3534 int mode_alignment = GET_MODE_BITSIZE (mode);
3535
3536 if (mode == XFmode)
3537 mode_alignment = 128;
3538 else if (mode == XCmode)
3539 mode_alignment = 256;
3540 if (COMPLEX_MODE_P (mode))
3541 mode_alignment /= 2;
3542 /* Misaligned fields are always returned in memory. */
3543 if (bit_offset % mode_alignment)
3544 return 0;
3545 }
3546
3547 /* for V1xx modes, just use the base mode */
3548 if (VECTOR_MODE_P (mode)
3549 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3550 mode = GET_MODE_INNER (mode);
3551
3552 /* Classification of atomic types. */
3553 switch (mode)
3554 {
3555 case SDmode:
3556 case DDmode:
3557 classes[0] = X86_64_SSE_CLASS;
3558 return 1;
3559 case TDmode:
3560 classes[0] = X86_64_SSE_CLASS;
3561 classes[1] = X86_64_SSEUP_CLASS;
3562 return 2;
3563 case DImode:
3564 case SImode:
3565 case HImode:
3566 case QImode:
3567 case CSImode:
3568 case CHImode:
3569 case CQImode:
3570 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3571 classes[0] = X86_64_INTEGERSI_CLASS;
3572 else
3573 classes[0] = X86_64_INTEGER_CLASS;
3574 return 1;
3575 case CDImode:
3576 case TImode:
3577 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3578 return 2;
3579 case CTImode:
3580 return 0;
3581 case SFmode:
3582 if (!(bit_offset % 64))
3583 classes[0] = X86_64_SSESF_CLASS;
3584 else
3585 classes[0] = X86_64_SSE_CLASS;
3586 return 1;
3587 case DFmode:
3588 classes[0] = X86_64_SSEDF_CLASS;
3589 return 1;
3590 case XFmode:
3591 classes[0] = X86_64_X87_CLASS;
3592 classes[1] = X86_64_X87UP_CLASS;
3593 return 2;
3594 case TFmode:
3595 classes[0] = X86_64_SSE_CLASS;
3596 classes[1] = X86_64_SSEUP_CLASS;
3597 return 2;
3598 case SCmode:
3599 classes[0] = X86_64_SSE_CLASS;
3600 return 1;
3601 case DCmode:
3602 classes[0] = X86_64_SSEDF_CLASS;
3603 classes[1] = X86_64_SSEDF_CLASS;
3604 return 2;
3605 case XCmode:
3606 classes[0] = X86_64_COMPLEX_X87_CLASS;
3607 return 1;
3608 case TCmode:
3609 /* This modes is larger than 16 bytes. */
3610 return 0;
3611 case V4SFmode:
3612 case V4SImode:
3613 case V16QImode:
3614 case V8HImode:
3615 case V2DFmode:
3616 case V2DImode:
3617 classes[0] = X86_64_SSE_CLASS;
3618 classes[1] = X86_64_SSEUP_CLASS;
3619 return 2;
3620 case V2SFmode:
3621 case V2SImode:
3622 case V4HImode:
3623 case V8QImode:
3624 classes[0] = X86_64_SSE_CLASS;
3625 return 1;
3626 case BLKmode:
3627 case VOIDmode:
3628 return 0;
3629 default:
3630 gcc_assert (VECTOR_MODE_P (mode));
3631
3632 if (bytes > 16)
3633 return 0;
3634
3635 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3636
3637 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3638 classes[0] = X86_64_INTEGERSI_CLASS;
3639 else
3640 classes[0] = X86_64_INTEGER_CLASS;
3641 classes[1] = X86_64_INTEGER_CLASS;
3642 return 1 + (bytes > 8);
3643 }
3644 }
3645
3646 /* Examine the argument and return set number of register required in each
3647 class. Return 0 iff parameter should be passed in memory. */
3648 static int
3649 examine_argument (enum machine_mode mode, tree type, int in_return,
3650 int *int_nregs, int *sse_nregs)
3651 {
3652 enum x86_64_reg_class class[MAX_CLASSES];
3653 int n = classify_argument (mode, type, class, 0);
3654
3655 *int_nregs = 0;
3656 *sse_nregs = 0;
3657 if (!n)
3658 return 0;
3659 for (n--; n >= 0; n--)
3660 switch (class[n])
3661 {
3662 case X86_64_INTEGER_CLASS:
3663 case X86_64_INTEGERSI_CLASS:
3664 (*int_nregs)++;
3665 break;
3666 case X86_64_SSE_CLASS:
3667 case X86_64_SSESF_CLASS:
3668 case X86_64_SSEDF_CLASS:
3669 (*sse_nregs)++;
3670 break;
3671 case X86_64_NO_CLASS:
3672 case X86_64_SSEUP_CLASS:
3673 break;
3674 case X86_64_X87_CLASS:
3675 case X86_64_X87UP_CLASS:
3676 if (!in_return)
3677 return 0;
3678 break;
3679 case X86_64_COMPLEX_X87_CLASS:
3680 return in_return ? 2 : 0;
3681 case X86_64_MEMORY_CLASS:
3682 gcc_unreachable ();
3683 }
3684 return 1;
3685 }
3686
3687 /* Construct container for the argument used by GCC interface. See
3688 FUNCTION_ARG for the detailed description. */
3689
3690 static rtx
3691 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3692 tree type, int in_return, int nintregs, int nsseregs,
3693 const int *intreg, int sse_regno)
3694 {
3695 /* The following variables hold the static issued_error state. */
3696 static bool issued_sse_arg_error;
3697 static bool issued_sse_ret_error;
3698 static bool issued_x87_ret_error;
3699
3700 enum machine_mode tmpmode;
3701 int bytes =
3702 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3703 enum x86_64_reg_class class[MAX_CLASSES];
3704 int n;
3705 int i;
3706 int nexps = 0;
3707 int needed_sseregs, needed_intregs;
3708 rtx exp[MAX_CLASSES];
3709 rtx ret;
3710
3711 n = classify_argument (mode, type, class, 0);
3712 if (TARGET_DEBUG_ARG)
3713 {
3714 if (!n)
3715 fprintf (stderr, "Memory class\n");
3716 else
3717 {
3718 fprintf (stderr, "Classes:");
3719 for (i = 0; i < n; i++)
3720 {
3721 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3722 }
3723 fprintf (stderr, "\n");
3724 }
3725 }
3726 if (!n)
3727 return NULL;
3728 if (!examine_argument (mode, type, in_return, &needed_intregs,
3729 &needed_sseregs))
3730 return NULL;
3731 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3732 return NULL;
3733
3734 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3735 some less clueful developer tries to use floating-point anyway. */
3736 if (needed_sseregs && !TARGET_SSE)
3737 {
3738 if (in_return)
3739 {
3740 if (!issued_sse_ret_error)
3741 {
3742 error ("SSE register return with SSE disabled");
3743 issued_sse_ret_error = true;
3744 }
3745 }
3746 else if (!issued_sse_arg_error)
3747 {
3748 error ("SSE register argument with SSE disabled");
3749 issued_sse_arg_error = true;
3750 }
3751 return NULL;
3752 }
3753
3754 /* Likewise, error if the ABI requires us to return values in the
3755 x87 registers and the user specified -mno-80387. */
3756 if (!TARGET_80387 && in_return)
3757 for (i = 0; i < n; i++)
3758 if (class[i] == X86_64_X87_CLASS
3759 || class[i] == X86_64_X87UP_CLASS
3760 || class[i] == X86_64_COMPLEX_X87_CLASS)
3761 {
3762 if (!issued_x87_ret_error)
3763 {
3764 error ("x87 register return with x87 disabled");
3765 issued_x87_ret_error = true;
3766 }
3767 return NULL;
3768 }
3769
3770 /* First construct simple cases. Avoid SCmode, since we want to use
3771 single register to pass this type. */
3772 if (n == 1 && mode != SCmode)
3773 switch (class[0])
3774 {
3775 case X86_64_INTEGER_CLASS:
3776 case X86_64_INTEGERSI_CLASS:
3777 return gen_rtx_REG (mode, intreg[0]);
3778 case X86_64_SSE_CLASS:
3779 case X86_64_SSESF_CLASS:
3780 case X86_64_SSEDF_CLASS:
3781 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3782 case X86_64_X87_CLASS:
3783 case X86_64_COMPLEX_X87_CLASS:
3784 return gen_rtx_REG (mode, FIRST_STACK_REG);
3785 case X86_64_NO_CLASS:
3786 /* Zero sized array, struct or class. */
3787 return NULL;
3788 default:
3789 gcc_unreachable ();
3790 }
3791 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3792 && mode != BLKmode)
3793 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3794 if (n == 2
3795 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3796 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3797 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3798 && class[1] == X86_64_INTEGER_CLASS
3799 && (mode == CDImode || mode == TImode || mode == TFmode)
3800 && intreg[0] + 1 == intreg[1])
3801 return gen_rtx_REG (mode, intreg[0]);
3802
3803 /* Otherwise figure out the entries of the PARALLEL. */
3804 for (i = 0; i < n; i++)
3805 {
3806 switch (class[i])
3807 {
3808 case X86_64_NO_CLASS:
3809 break;
3810 case X86_64_INTEGER_CLASS:
3811 case X86_64_INTEGERSI_CLASS:
3812 /* Merge TImodes on aligned occasions here too. */
3813 if (i * 8 + 8 > bytes)
3814 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3815 else if (class[i] == X86_64_INTEGERSI_CLASS)
3816 tmpmode = SImode;
3817 else
3818 tmpmode = DImode;
3819 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3820 if (tmpmode == BLKmode)
3821 tmpmode = DImode;
3822 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3823 gen_rtx_REG (tmpmode, *intreg),
3824 GEN_INT (i*8));
3825 intreg++;
3826 break;
3827 case X86_64_SSESF_CLASS:
3828 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3829 gen_rtx_REG (SFmode,
3830 SSE_REGNO (sse_regno)),
3831 GEN_INT (i*8));
3832 sse_regno++;
3833 break;
3834 case X86_64_SSEDF_CLASS:
3835 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3836 gen_rtx_REG (DFmode,
3837 SSE_REGNO (sse_regno)),
3838 GEN_INT (i*8));
3839 sse_regno++;
3840 break;
3841 case X86_64_SSE_CLASS:
3842 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3843 tmpmode = TImode;
3844 else
3845 tmpmode = DImode;
3846 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3847 gen_rtx_REG (tmpmode,
3848 SSE_REGNO (sse_regno)),
3849 GEN_INT (i*8));
3850 if (tmpmode == TImode)
3851 i++;
3852 sse_regno++;
3853 break;
3854 default:
3855 gcc_unreachable ();
3856 }
3857 }
3858
3859 /* Empty aligned struct, union or class. */
3860 if (nexps == 0)
3861 return NULL;
3862
3863 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3864 for (i = 0; i < nexps; i++)
3865 XVECEXP (ret, 0, i) = exp [i];
3866 return ret;
3867 }
3868
3869 /* Update the data in CUM to advance over an argument
3870 of mode MODE and data type TYPE.
3871 (TYPE is null for libcalls where that information may not be available.) */
3872
3873 void
3874 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3875 tree type, int named)
3876 {
3877 int bytes =
3878 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3879 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3880
3881 if (type)
3882 mode = type_natural_mode (type);
3883
3884 if (TARGET_DEBUG_ARG)
3885 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3886 "mode=%s, named=%d)\n\n",
3887 words, cum->words, cum->nregs, cum->sse_nregs,
3888 GET_MODE_NAME (mode), named);
3889
3890 if (TARGET_64BIT)
3891 {
3892 int int_nregs, sse_nregs;
3893 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3894 cum->words += words;
3895 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3896 {
3897 cum->nregs -= int_nregs;
3898 cum->sse_nregs -= sse_nregs;
3899 cum->regno += int_nregs;
3900 cum->sse_regno += sse_nregs;
3901 }
3902 else
3903 cum->words += words;
3904 }
3905 else
3906 {
3907 switch (mode)
3908 {
3909 default:
3910 break;
3911
3912 case BLKmode:
3913 if (bytes < 0)
3914 break;
3915 /* FALLTHRU */
3916
3917 case DImode:
3918 case SImode:
3919 case HImode:
3920 case QImode:
3921 cum->words += words;
3922 cum->nregs -= words;
3923 cum->regno += words;
3924
3925 if (cum->nregs <= 0)
3926 {
3927 cum->nregs = 0;
3928 cum->regno = 0;
3929 }
3930 break;
3931
3932 case DFmode:
3933 if (cum->float_in_sse < 2)
3934 break;
3935 case SFmode:
3936 if (cum->float_in_sse < 1)
3937 break;
3938 /* FALLTHRU */
3939
3940 case TImode:
3941 case V16QImode:
3942 case V8HImode:
3943 case V4SImode:
3944 case V2DImode:
3945 case V4SFmode:
3946 case V2DFmode:
3947 if (!type || !AGGREGATE_TYPE_P (type))
3948 {
3949 cum->sse_words += words;
3950 cum->sse_nregs -= 1;
3951 cum->sse_regno += 1;
3952 if (cum->sse_nregs <= 0)
3953 {
3954 cum->sse_nregs = 0;
3955 cum->sse_regno = 0;
3956 }
3957 }
3958 break;
3959
3960 case V8QImode:
3961 case V4HImode:
3962 case V2SImode:
3963 case V2SFmode:
3964 if (!type || !AGGREGATE_TYPE_P (type))
3965 {
3966 cum->mmx_words += words;
3967 cum->mmx_nregs -= 1;
3968 cum->mmx_regno += 1;
3969 if (cum->mmx_nregs <= 0)
3970 {
3971 cum->mmx_nregs = 0;
3972 cum->mmx_regno = 0;
3973 }
3974 }
3975 break;
3976 }
3977 }
3978 }
3979
3980 /* Define where to put the arguments to a function.
3981 Value is zero to push the argument on the stack,
3982 or a hard register in which to store the argument.
3983
3984 MODE is the argument's machine mode.
3985 TYPE is the data type of the argument (as a tree).
3986 This is null for libcalls where that information may
3987 not be available.
3988 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3989 the preceding args and about the function being called.
3990 NAMED is nonzero if this argument is a named parameter
3991 (otherwise it is an extra parameter matching an ellipsis). */
3992
3993 rtx
3994 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3995 tree type, int named)
3996 {
3997 enum machine_mode mode = orig_mode;
3998 rtx ret = NULL_RTX;
3999 int bytes =
4000 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4001 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4002 static bool warnedsse, warnedmmx;
4003
4004 /* To simplify the code below, represent vector types with a vector mode
4005 even if MMX/SSE are not active. */
4006 if (type && TREE_CODE (type) == VECTOR_TYPE)
4007 mode = type_natural_mode (type);
4008
4009 /* Handle a hidden AL argument containing number of registers for varargs
4010 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4011 any AL settings. */
4012 if (mode == VOIDmode)
4013 {
4014 if (TARGET_64BIT)
4015 return GEN_INT (cum->maybe_vaarg
4016 ? (cum->sse_nregs < 0
4017 ? SSE_REGPARM_MAX
4018 : cum->sse_regno)
4019 : -1);
4020 else
4021 return constm1_rtx;
4022 }
4023 if (TARGET_64BIT)
4024 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4025 cum->sse_nregs,
4026 &x86_64_int_parameter_registers [cum->regno],
4027 cum->sse_regno);
4028 else
4029 switch (mode)
4030 {
4031 /* For now, pass fp/complex values on the stack. */
4032 default:
4033 break;
4034
4035 case BLKmode:
4036 if (bytes < 0)
4037 break;
4038 /* FALLTHRU */
4039 case DImode:
4040 case SImode:
4041 case HImode:
4042 case QImode:
4043 if (words <= cum->nregs)
4044 {
4045 int regno = cum->regno;
4046
4047 /* Fastcall allocates the first two DWORD (SImode) or
4048 smaller arguments to ECX and EDX. */
4049 if (cum->fastcall)
4050 {
4051 if (mode == BLKmode || mode == DImode)
4052 break;
4053
4054 /* ECX not EAX is the first allocated register. */
4055 if (regno == 0)
4056 regno = 2;
4057 }
4058 ret = gen_rtx_REG (mode, regno);
4059 }
4060 break;
4061 case DFmode:
4062 if (cum->float_in_sse < 2)
4063 break;
4064 case SFmode:
4065 if (cum->float_in_sse < 1)
4066 break;
4067 /* FALLTHRU */
4068 case TImode:
4069 case V16QImode:
4070 case V8HImode:
4071 case V4SImode:
4072 case V2DImode:
4073 case V4SFmode:
4074 case V2DFmode:
4075 if (!type || !AGGREGATE_TYPE_P (type))
4076 {
4077 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4078 {
4079 warnedsse = true;
4080 warning (0, "SSE vector argument without SSE enabled "
4081 "changes the ABI");
4082 }
4083 if (cum->sse_nregs)
4084 ret = gen_reg_or_parallel (mode, orig_mode,
4085 cum->sse_regno + FIRST_SSE_REG);
4086 }
4087 break;
4088 case V8QImode:
4089 case V4HImode:
4090 case V2SImode:
4091 case V2SFmode:
4092 if (!type || !AGGREGATE_TYPE_P (type))
4093 {
4094 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4095 {
4096 warnedmmx = true;
4097 warning (0, "MMX vector argument without MMX enabled "
4098 "changes the ABI");
4099 }
4100 if (cum->mmx_nregs)
4101 ret = gen_reg_or_parallel (mode, orig_mode,
4102 cum->mmx_regno + FIRST_MMX_REG);
4103 }
4104 break;
4105 }
4106
4107 if (TARGET_DEBUG_ARG)
4108 {
4109 fprintf (stderr,
4110 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4111 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4112
4113 if (ret)
4114 print_simple_rtl (stderr, ret);
4115 else
4116 fprintf (stderr, ", stack");
4117
4118 fprintf (stderr, " )\n");
4119 }
4120
4121 return ret;
4122 }
4123
4124 /* A C expression that indicates when an argument must be passed by
4125 reference. If nonzero for an argument, a copy of that argument is
4126 made in memory and a pointer to the argument is passed instead of
4127 the argument itself. The pointer is passed in whatever way is
4128 appropriate for passing a pointer to that type. */
4129
4130 static bool
4131 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4132 enum machine_mode mode ATTRIBUTE_UNUSED,
4133 tree type, bool named ATTRIBUTE_UNUSED)
4134 {
4135 if (!TARGET_64BIT)
4136 return 0;
4137
4138 if (type && int_size_in_bytes (type) == -1)
4139 {
4140 if (TARGET_DEBUG_ARG)
4141 fprintf (stderr, "function_arg_pass_by_reference\n");
4142 return 1;
4143 }
4144
4145 return 0;
4146 }
4147
4148 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4149 ABI. Only called if TARGET_SSE. */
4150 static bool
4151 contains_128bit_aligned_vector_p (tree type)
4152 {
4153 enum machine_mode mode = TYPE_MODE (type);
4154 if (SSE_REG_MODE_P (mode)
4155 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4156 return true;
4157 if (TYPE_ALIGN (type) < 128)
4158 return false;
4159
4160 if (AGGREGATE_TYPE_P (type))
4161 {
4162 /* Walk the aggregates recursively. */
4163 switch (TREE_CODE (type))
4164 {
4165 case RECORD_TYPE:
4166 case UNION_TYPE:
4167 case QUAL_UNION_TYPE:
4168 {
4169 tree field;
4170
4171 /* Walk all the structure fields. */
4172 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4173 {
4174 if (TREE_CODE (field) == FIELD_DECL
4175 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4176 return true;
4177 }
4178 break;
4179 }
4180
4181 case ARRAY_TYPE:
4182 /* Just for use if some languages passes arrays by value. */
4183 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4184 return true;
4185 break;
4186
4187 default:
4188 gcc_unreachable ();
4189 }
4190 }
4191 return false;
4192 }
4193
4194 /* Gives the alignment boundary, in bits, of an argument with the
4195 specified mode and type. */
4196
4197 int
4198 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4199 {
4200 int align;
4201 if (type)
4202 align = TYPE_ALIGN (type);
4203 else
4204 align = GET_MODE_ALIGNMENT (mode);
4205 if (align < PARM_BOUNDARY)
4206 align = PARM_BOUNDARY;
4207 if (!TARGET_64BIT)
4208 {
4209 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4210 make an exception for SSE modes since these require 128bit
4211 alignment.
4212
4213 The handling here differs from field_alignment. ICC aligns MMX
4214 arguments to 4 byte boundaries, while structure fields are aligned
4215 to 8 byte boundaries. */
4216 if (!TARGET_SSE)
4217 align = PARM_BOUNDARY;
4218 else if (!type)
4219 {
4220 if (!SSE_REG_MODE_P (mode))
4221 align = PARM_BOUNDARY;
4222 }
4223 else
4224 {
4225 if (!contains_128bit_aligned_vector_p (type))
4226 align = PARM_BOUNDARY;
4227 }
4228 }
4229 if (align > 128)
4230 align = 128;
4231 return align;
4232 }
4233
4234 /* Return true if N is a possible register number of function value. */
4235 bool
4236 ix86_function_value_regno_p (int regno)
4237 {
4238 if (TARGET_MACHO)
4239 {
4240 if (!TARGET_64BIT)
4241 {
4242 return ((regno) == 0
4243 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4244 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4245 }
4246 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4247 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4248 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4249 }
4250 else
4251 {
4252 if (regno == 0
4253 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4254 || (regno == FIRST_SSE_REG && TARGET_SSE))
4255 return true;
4256
4257 if (!TARGET_64BIT
4258 && (regno == FIRST_MMX_REG && TARGET_MMX))
4259 return true;
4260
4261 return false;
4262 }
4263 }
4264
4265 /* Define how to find the value returned by a function.
4266 VALTYPE is the data type of the value (as a tree).
4267 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4268 otherwise, FUNC is 0. */
4269 rtx
4270 ix86_function_value (tree valtype, tree fntype_or_decl,
4271 bool outgoing ATTRIBUTE_UNUSED)
4272 {
4273 enum machine_mode natmode = type_natural_mode (valtype);
4274
4275 if (TARGET_64BIT)
4276 {
4277 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4278 1, REGPARM_MAX, SSE_REGPARM_MAX,
4279 x86_64_int_return_registers, 0);
4280 /* For zero sized structures, construct_container return NULL, but we
4281 need to keep rest of compiler happy by returning meaningful value. */
4282 if (!ret)
4283 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4284 return ret;
4285 }
4286 else
4287 {
4288 tree fn = NULL_TREE, fntype;
4289 if (fntype_or_decl
4290 && DECL_P (fntype_or_decl))
4291 fn = fntype_or_decl;
4292 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4293 return gen_rtx_REG (TYPE_MODE (valtype),
4294 ix86_value_regno (natmode, fn, fntype));
4295 }
4296 }
4297
4298 /* Return true iff type is returned in memory. */
4299 int
4300 ix86_return_in_memory (tree type)
4301 {
4302 int needed_intregs, needed_sseregs, size;
4303 enum machine_mode mode = type_natural_mode (type);
4304
4305 if (TARGET_64BIT)
4306 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4307
4308 if (mode == BLKmode)
4309 return 1;
4310
4311 size = int_size_in_bytes (type);
4312
4313 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4314 return 0;
4315
4316 if (VECTOR_MODE_P (mode) || mode == TImode)
4317 {
4318 /* User-created vectors small enough to fit in EAX. */
4319 if (size < 8)
4320 return 0;
4321
4322 /* MMX/3dNow values are returned in MM0,
4323 except when it doesn't exits. */
4324 if (size == 8)
4325 return (TARGET_MMX ? 0 : 1);
4326
4327 /* SSE values are returned in XMM0, except when it doesn't exist. */
4328 if (size == 16)
4329 return (TARGET_SSE ? 0 : 1);
4330 }
4331
4332 if (mode == XFmode)
4333 return 0;
4334
4335 if (mode == TDmode)
4336 return 1;
4337
4338 if (size > 12)
4339 return 1;
4340 return 0;
4341 }
4342
4343 /* When returning SSE vector types, we have a choice of either
4344 (1) being abi incompatible with a -march switch, or
4345 (2) generating an error.
4346 Given no good solution, I think the safest thing is one warning.
4347 The user won't be able to use -Werror, but....
4348
4349 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4350 called in response to actually generating a caller or callee that
4351 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4352 via aggregate_value_p for general type probing from tree-ssa. */
4353
4354 static rtx
4355 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4356 {
4357 static bool warnedsse, warnedmmx;
4358
4359 if (type)
4360 {
4361 /* Look at the return type of the function, not the function type. */
4362 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4363
4364 if (!TARGET_SSE && !warnedsse)
4365 {
4366 if (mode == TImode
4367 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4368 {
4369 warnedsse = true;
4370 warning (0, "SSE vector return without SSE enabled "
4371 "changes the ABI");
4372 }
4373 }
4374
4375 if (!TARGET_MMX && !warnedmmx)
4376 {
4377 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4378 {
4379 warnedmmx = true;
4380 warning (0, "MMX vector return without MMX enabled "
4381 "changes the ABI");
4382 }
4383 }
4384 }
4385
4386 return NULL;
4387 }
4388
4389 /* Define how to find the value returned by a library function
4390 assuming the value has mode MODE. */
4391 rtx
4392 ix86_libcall_value (enum machine_mode mode)
4393 {
4394 if (TARGET_64BIT)
4395 {
4396 switch (mode)
4397 {
4398 case SFmode:
4399 case SCmode:
4400 case DFmode:
4401 case DCmode:
4402 case TFmode:
4403 case SDmode:
4404 case DDmode:
4405 case TDmode:
4406 return gen_rtx_REG (mode, FIRST_SSE_REG);
4407 case XFmode:
4408 case XCmode:
4409 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4410 case TCmode:
4411 return NULL;
4412 default:
4413 return gen_rtx_REG (mode, 0);
4414 }
4415 }
4416 else
4417 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4418 }
4419
4420 /* Given a mode, return the register to use for a return value. */
4421
4422 static int
4423 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4424 {
4425 gcc_assert (!TARGET_64BIT);
4426
4427 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4428 we normally prevent this case when mmx is not available. However
4429 some ABIs may require the result to be returned like DImode. */
4430 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4431 return TARGET_MMX ? FIRST_MMX_REG : 0;
4432
4433 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4434 we prevent this case when sse is not available. However some ABIs
4435 may require the result to be returned like integer TImode. */
4436 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4437 return TARGET_SSE ? FIRST_SSE_REG : 0;
4438
4439 /* Decimal floating point values can go in %eax, unlike other float modes. */
4440 if (DECIMAL_FLOAT_MODE_P (mode))
4441 return 0;
4442
4443 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4444 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4445 return 0;
4446
4447 /* Floating point return values in %st(0), except for local functions when
4448 SSE math is enabled or for functions with sseregparm attribute. */
4449 if ((func || fntype)
4450 && (mode == SFmode || mode == DFmode))
4451 {
4452 int sse_level = ix86_function_sseregparm (fntype, func);
4453 if ((sse_level >= 1 && mode == SFmode)
4454 || (sse_level == 2 && mode == DFmode))
4455 return FIRST_SSE_REG;
4456 }
4457
4458 return FIRST_FLOAT_REG;
4459 }
4460 \f
4461 /* Create the va_list data type. */
4462
4463 static tree
4464 ix86_build_builtin_va_list (void)
4465 {
4466 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4467
4468 /* For i386 we use plain pointer to argument area. */
4469 if (!TARGET_64BIT)
4470 return build_pointer_type (char_type_node);
4471
4472 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4473 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4474
4475 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4476 unsigned_type_node);
4477 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4478 unsigned_type_node);
4479 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4480 ptr_type_node);
4481 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4482 ptr_type_node);
4483
4484 va_list_gpr_counter_field = f_gpr;
4485 va_list_fpr_counter_field = f_fpr;
4486
4487 DECL_FIELD_CONTEXT (f_gpr) = record;
4488 DECL_FIELD_CONTEXT (f_fpr) = record;
4489 DECL_FIELD_CONTEXT (f_ovf) = record;
4490 DECL_FIELD_CONTEXT (f_sav) = record;
4491
4492 TREE_CHAIN (record) = type_decl;
4493 TYPE_NAME (record) = type_decl;
4494 TYPE_FIELDS (record) = f_gpr;
4495 TREE_CHAIN (f_gpr) = f_fpr;
4496 TREE_CHAIN (f_fpr) = f_ovf;
4497 TREE_CHAIN (f_ovf) = f_sav;
4498
4499 layout_type (record);
4500
4501 /* The correct type is an array type of one element. */
4502 return build_array_type (record, build_index_type (size_zero_node));
4503 }
4504
4505 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4506
4507 static void
4508 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4509 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4510 int no_rtl)
4511 {
4512 CUMULATIVE_ARGS next_cum;
4513 rtx save_area = NULL_RTX, mem;
4514 rtx label;
4515 rtx label_ref;
4516 rtx tmp_reg;
4517 rtx nsse_reg;
4518 int set;
4519 tree fntype;
4520 int stdarg_p;
4521 int i;
4522
4523 if (!TARGET_64BIT)
4524 return;
4525
4526 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4527 return;
4528
4529 /* Indicate to allocate space on the stack for varargs save area. */
4530 ix86_save_varrargs_registers = 1;
4531
4532 cfun->stack_alignment_needed = 128;
4533
4534 fntype = TREE_TYPE (current_function_decl);
4535 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4536 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4537 != void_type_node));
4538
4539 /* For varargs, we do not want to skip the dummy va_dcl argument.
4540 For stdargs, we do want to skip the last named argument. */
4541 next_cum = *cum;
4542 if (stdarg_p)
4543 function_arg_advance (&next_cum, mode, type, 1);
4544
4545 if (!no_rtl)
4546 save_area = frame_pointer_rtx;
4547
4548 set = get_varargs_alias_set ();
4549
4550 for (i = next_cum.regno;
4551 i < ix86_regparm
4552 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4553 i++)
4554 {
4555 mem = gen_rtx_MEM (Pmode,
4556 plus_constant (save_area, i * UNITS_PER_WORD));
4557 MEM_NOTRAP_P (mem) = 1;
4558 set_mem_alias_set (mem, set);
4559 emit_move_insn (mem, gen_rtx_REG (Pmode,
4560 x86_64_int_parameter_registers[i]));
4561 }
4562
4563 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4564 {
4565 /* Now emit code to save SSE registers. The AX parameter contains number
4566 of SSE parameter registers used to call this function. We use
4567 sse_prologue_save insn template that produces computed jump across
4568 SSE saves. We need some preparation work to get this working. */
4569
4570 label = gen_label_rtx ();
4571 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4572
4573 /* Compute address to jump to :
4574 label - 5*eax + nnamed_sse_arguments*5 */
4575 tmp_reg = gen_reg_rtx (Pmode);
4576 nsse_reg = gen_reg_rtx (Pmode);
4577 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4578 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4579 gen_rtx_MULT (Pmode, nsse_reg,
4580 GEN_INT (4))));
4581 if (next_cum.sse_regno)
4582 emit_move_insn
4583 (nsse_reg,
4584 gen_rtx_CONST (DImode,
4585 gen_rtx_PLUS (DImode,
4586 label_ref,
4587 GEN_INT (next_cum.sse_regno * 4))));
4588 else
4589 emit_move_insn (nsse_reg, label_ref);
4590 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4591
4592 /* Compute address of memory block we save into. We always use pointer
4593 pointing 127 bytes after first byte to store - this is needed to keep
4594 instruction size limited by 4 bytes. */
4595 tmp_reg = gen_reg_rtx (Pmode);
4596 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4597 plus_constant (save_area,
4598 8 * REGPARM_MAX + 127)));
4599 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4600 MEM_NOTRAP_P (mem) = 1;
4601 set_mem_alias_set (mem, set);
4602 set_mem_align (mem, BITS_PER_WORD);
4603
4604 /* And finally do the dirty job! */
4605 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4606 GEN_INT (next_cum.sse_regno), label));
4607 }
4608
4609 }
4610
4611 /* Implement va_start. */
4612
4613 void
4614 ix86_va_start (tree valist, rtx nextarg)
4615 {
4616 HOST_WIDE_INT words, n_gpr, n_fpr;
4617 tree f_gpr, f_fpr, f_ovf, f_sav;
4618 tree gpr, fpr, ovf, sav, t;
4619 tree type;
4620
4621 /* Only 64bit target needs something special. */
4622 if (!TARGET_64BIT)
4623 {
4624 std_expand_builtin_va_start (valist, nextarg);
4625 return;
4626 }
4627
4628 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4629 f_fpr = TREE_CHAIN (f_gpr);
4630 f_ovf = TREE_CHAIN (f_fpr);
4631 f_sav = TREE_CHAIN (f_ovf);
4632
4633 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4634 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4635 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4636 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4637 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4638
4639 /* Count number of gp and fp argument registers used. */
4640 words = current_function_args_info.words;
4641 n_gpr = current_function_args_info.regno;
4642 n_fpr = current_function_args_info.sse_regno;
4643
4644 if (TARGET_DEBUG_ARG)
4645 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4646 (int) words, (int) n_gpr, (int) n_fpr);
4647
4648 if (cfun->va_list_gpr_size)
4649 {
4650 type = TREE_TYPE (gpr);
4651 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4652 build_int_cst (type, n_gpr * 8));
4653 TREE_SIDE_EFFECTS (t) = 1;
4654 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4655 }
4656
4657 if (cfun->va_list_fpr_size)
4658 {
4659 type = TREE_TYPE (fpr);
4660 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4661 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4662 TREE_SIDE_EFFECTS (t) = 1;
4663 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4664 }
4665
4666 /* Find the overflow area. */
4667 type = TREE_TYPE (ovf);
4668 t = make_tree (type, virtual_incoming_args_rtx);
4669 if (words != 0)
4670 t = build2 (PLUS_EXPR, type, t,
4671 build_int_cst (type, words * UNITS_PER_WORD));
4672 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4673 TREE_SIDE_EFFECTS (t) = 1;
4674 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4675
4676 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4677 {
4678 /* Find the register save area.
4679 Prologue of the function save it right above stack frame. */
4680 type = TREE_TYPE (sav);
4681 t = make_tree (type, frame_pointer_rtx);
4682 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4683 TREE_SIDE_EFFECTS (t) = 1;
4684 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4685 }
4686 }
4687
4688 /* Implement va_arg. */
4689
4690 tree
4691 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4692 {
4693 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4694 tree f_gpr, f_fpr, f_ovf, f_sav;
4695 tree gpr, fpr, ovf, sav, t;
4696 int size, rsize;
4697 tree lab_false, lab_over = NULL_TREE;
4698 tree addr, t2;
4699 rtx container;
4700 int indirect_p = 0;
4701 tree ptrtype;
4702 enum machine_mode nat_mode;
4703
4704 /* Only 64bit target needs something special. */
4705 if (!TARGET_64BIT)
4706 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4707
4708 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4709 f_fpr = TREE_CHAIN (f_gpr);
4710 f_ovf = TREE_CHAIN (f_fpr);
4711 f_sav = TREE_CHAIN (f_ovf);
4712
4713 valist = build_va_arg_indirect_ref (valist);
4714 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4715 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4716 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4717 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4718
4719 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4720 if (indirect_p)
4721 type = build_pointer_type (type);
4722 size = int_size_in_bytes (type);
4723 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4724
4725 nat_mode = type_natural_mode (type);
4726 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4727 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4728
4729 /* Pull the value out of the saved registers. */
4730
4731 addr = create_tmp_var (ptr_type_node, "addr");
4732 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4733
4734 if (container)
4735 {
4736 int needed_intregs, needed_sseregs;
4737 bool need_temp;
4738 tree int_addr, sse_addr;
4739
4740 lab_false = create_artificial_label ();
4741 lab_over = create_artificial_label ();
4742
4743 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4744
4745 need_temp = (!REG_P (container)
4746 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4747 || TYPE_ALIGN (type) > 128));
4748
4749 /* In case we are passing structure, verify that it is consecutive block
4750 on the register save area. If not we need to do moves. */
4751 if (!need_temp && !REG_P (container))
4752 {
4753 /* Verify that all registers are strictly consecutive */
4754 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4755 {
4756 int i;
4757
4758 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4759 {
4760 rtx slot = XVECEXP (container, 0, i);
4761 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4762 || INTVAL (XEXP (slot, 1)) != i * 16)
4763 need_temp = 1;
4764 }
4765 }
4766 else
4767 {
4768 int i;
4769
4770 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4771 {
4772 rtx slot = XVECEXP (container, 0, i);
4773 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4774 || INTVAL (XEXP (slot, 1)) != i * 8)
4775 need_temp = 1;
4776 }
4777 }
4778 }
4779 if (!need_temp)
4780 {
4781 int_addr = addr;
4782 sse_addr = addr;
4783 }
4784 else
4785 {
4786 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4787 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4788 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4789 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4790 }
4791
4792 /* First ensure that we fit completely in registers. */
4793 if (needed_intregs)
4794 {
4795 t = build_int_cst (TREE_TYPE (gpr),
4796 (REGPARM_MAX - needed_intregs + 1) * 8);
4797 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4798 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4799 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4800 gimplify_and_add (t, pre_p);
4801 }
4802 if (needed_sseregs)
4803 {
4804 t = build_int_cst (TREE_TYPE (fpr),
4805 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4806 + REGPARM_MAX * 8);
4807 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4808 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4809 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4810 gimplify_and_add (t, pre_p);
4811 }
4812
4813 /* Compute index to start of area used for integer regs. */
4814 if (needed_intregs)
4815 {
4816 /* int_addr = gpr + sav; */
4817 t = fold_convert (ptr_type_node, gpr);
4818 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4819 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4820 gimplify_and_add (t, pre_p);
4821 }
4822 if (needed_sseregs)
4823 {
4824 /* sse_addr = fpr + sav; */
4825 t = fold_convert (ptr_type_node, fpr);
4826 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4827 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4828 gimplify_and_add (t, pre_p);
4829 }
4830 if (need_temp)
4831 {
4832 int i;
4833 tree temp = create_tmp_var (type, "va_arg_tmp");
4834
4835 /* addr = &temp; */
4836 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4837 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4838 gimplify_and_add (t, pre_p);
4839
4840 for (i = 0; i < XVECLEN (container, 0); i++)
4841 {
4842 rtx slot = XVECEXP (container, 0, i);
4843 rtx reg = XEXP (slot, 0);
4844 enum machine_mode mode = GET_MODE (reg);
4845 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4846 tree addr_type = build_pointer_type (piece_type);
4847 tree src_addr, src;
4848 int src_offset;
4849 tree dest_addr, dest;
4850
4851 if (SSE_REGNO_P (REGNO (reg)))
4852 {
4853 src_addr = sse_addr;
4854 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4855 }
4856 else
4857 {
4858 src_addr = int_addr;
4859 src_offset = REGNO (reg) * 8;
4860 }
4861 src_addr = fold_convert (addr_type, src_addr);
4862 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4863 size_int (src_offset)));
4864 src = build_va_arg_indirect_ref (src_addr);
4865
4866 dest_addr = fold_convert (addr_type, addr);
4867 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4868 size_int (INTVAL (XEXP (slot, 1)))));
4869 dest = build_va_arg_indirect_ref (dest_addr);
4870
4871 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4872 gimplify_and_add (t, pre_p);
4873 }
4874 }
4875
4876 if (needed_intregs)
4877 {
4878 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4879 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4880 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4881 gimplify_and_add (t, pre_p);
4882 }
4883 if (needed_sseregs)
4884 {
4885 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4886 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4887 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4888 gimplify_and_add (t, pre_p);
4889 }
4890
4891 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4892 gimplify_and_add (t, pre_p);
4893
4894 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4895 append_to_statement_list (t, pre_p);
4896 }
4897
4898 /* ... otherwise out of the overflow area. */
4899
4900 /* Care for on-stack alignment if needed. */
4901 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4902 || integer_zerop (TYPE_SIZE (type)))
4903 t = ovf;
4904 else
4905 {
4906 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4907 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4908 build_int_cst (TREE_TYPE (ovf), align - 1));
4909 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4910 build_int_cst (TREE_TYPE (t), -align));
4911 }
4912 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4913
4914 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4915 gimplify_and_add (t2, pre_p);
4916
4917 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4918 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4919 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4920 gimplify_and_add (t, pre_p);
4921
4922 if (container)
4923 {
4924 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4925 append_to_statement_list (t, pre_p);
4926 }
4927
4928 ptrtype = build_pointer_type (type);
4929 addr = fold_convert (ptrtype, addr);
4930
4931 if (indirect_p)
4932 addr = build_va_arg_indirect_ref (addr);
4933 return build_va_arg_indirect_ref (addr);
4934 }
4935 \f
4936 /* Return nonzero if OPNUM's MEM should be matched
4937 in movabs* patterns. */
4938
4939 int
4940 ix86_check_movabs (rtx insn, int opnum)
4941 {
4942 rtx set, mem;
4943
4944 set = PATTERN (insn);
4945 if (GET_CODE (set) == PARALLEL)
4946 set = XVECEXP (set, 0, 0);
4947 gcc_assert (GET_CODE (set) == SET);
4948 mem = XEXP (set, opnum);
4949 while (GET_CODE (mem) == SUBREG)
4950 mem = SUBREG_REG (mem);
4951 gcc_assert (MEM_P (mem));
4952 return (volatile_ok || !MEM_VOLATILE_P (mem));
4953 }
4954 \f
4955 /* Initialize the table of extra 80387 mathematical constants. */
4956
4957 static void
4958 init_ext_80387_constants (void)
4959 {
4960 static const char * cst[5] =
4961 {
4962 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4963 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4964 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4965 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4966 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4967 };
4968 int i;
4969
4970 for (i = 0; i < 5; i++)
4971 {
4972 real_from_string (&ext_80387_constants_table[i], cst[i]);
4973 /* Ensure each constant is rounded to XFmode precision. */
4974 real_convert (&ext_80387_constants_table[i],
4975 XFmode, &ext_80387_constants_table[i]);
4976 }
4977
4978 ext_80387_constants_init = 1;
4979 }
4980
4981 /* Return true if the constant is something that can be loaded with
4982 a special instruction. */
4983
4984 int
4985 standard_80387_constant_p (rtx x)
4986 {
4987 REAL_VALUE_TYPE r;
4988
4989 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4990 return -1;
4991
4992 if (x == CONST0_RTX (GET_MODE (x)))
4993 return 1;
4994 if (x == CONST1_RTX (GET_MODE (x)))
4995 return 2;
4996
4997 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4998
4999 /* For XFmode constants, try to find a special 80387 instruction when
5000 optimizing for size or on those CPUs that benefit from them. */
5001 if (GET_MODE (x) == XFmode
5002 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
5003 {
5004 int i;
5005
5006 if (! ext_80387_constants_init)
5007 init_ext_80387_constants ();
5008
5009 for (i = 0; i < 5; i++)
5010 if (real_identical (&r, &ext_80387_constants_table[i]))
5011 return i + 3;
5012 }
5013
5014 /* Load of the constant -0.0 or -1.0 will be split as
5015 fldz;fchs or fld1;fchs sequence. */
5016 if (real_isnegzero (&r))
5017 return 8;
5018 if (real_identical (&r, &dconstm1))
5019 return 9;
5020
5021 return 0;
5022 }
5023
5024 /* Return the opcode of the special instruction to be used to load
5025 the constant X. */
5026
5027 const char *
5028 standard_80387_constant_opcode (rtx x)
5029 {
5030 switch (standard_80387_constant_p (x))
5031 {
5032 case 1:
5033 return "fldz";
5034 case 2:
5035 return "fld1";
5036 case 3:
5037 return "fldlg2";
5038 case 4:
5039 return "fldln2";
5040 case 5:
5041 return "fldl2e";
5042 case 6:
5043 return "fldl2t";
5044 case 7:
5045 return "fldpi";
5046 case 8:
5047 case 9:
5048 return "#";
5049 default:
5050 gcc_unreachable ();
5051 }
5052 }
5053
5054 /* Return the CONST_DOUBLE representing the 80387 constant that is
5055 loaded by the specified special instruction. The argument IDX
5056 matches the return value from standard_80387_constant_p. */
5057
5058 rtx
5059 standard_80387_constant_rtx (int idx)
5060 {
5061 int i;
5062
5063 if (! ext_80387_constants_init)
5064 init_ext_80387_constants ();
5065
5066 switch (idx)
5067 {
5068 case 3:
5069 case 4:
5070 case 5:
5071 case 6:
5072 case 7:
5073 i = idx - 3;
5074 break;
5075
5076 default:
5077 gcc_unreachable ();
5078 }
5079
5080 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5081 XFmode);
5082 }
5083
5084 /* Return 1 if mode is a valid mode for sse. */
5085 static int
5086 standard_sse_mode_p (enum machine_mode mode)
5087 {
5088 switch (mode)
5089 {
5090 case V16QImode:
5091 case V8HImode:
5092 case V4SImode:
5093 case V2DImode:
5094 case V4SFmode:
5095 case V2DFmode:
5096 return 1;
5097
5098 default:
5099 return 0;
5100 }
5101 }
5102
5103 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5104 */
5105 int
5106 standard_sse_constant_p (rtx x)
5107 {
5108 enum machine_mode mode = GET_MODE (x);
5109
5110 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5111 return 1;
5112 if (vector_all_ones_operand (x, mode)
5113 && standard_sse_mode_p (mode))
5114 return TARGET_SSE2 ? 2 : -1;
5115
5116 return 0;
5117 }
5118
5119 /* Return the opcode of the special instruction to be used to load
5120 the constant X. */
5121
5122 const char *
5123 standard_sse_constant_opcode (rtx insn, rtx x)
5124 {
5125 switch (standard_sse_constant_p (x))
5126 {
5127 case 1:
5128 if (get_attr_mode (insn) == MODE_V4SF)
5129 return "xorps\t%0, %0";
5130 else if (get_attr_mode (insn) == MODE_V2DF)
5131 return "xorpd\t%0, %0";
5132 else
5133 return "pxor\t%0, %0";
5134 case 2:
5135 return "pcmpeqd\t%0, %0";
5136 }
5137 gcc_unreachable ();
5138 }
5139
5140 /* Returns 1 if OP contains a symbol reference */
5141
5142 int
5143 symbolic_reference_mentioned_p (rtx op)
5144 {
5145 const char *fmt;
5146 int i;
5147
5148 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5149 return 1;
5150
5151 fmt = GET_RTX_FORMAT (GET_CODE (op));
5152 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5153 {
5154 if (fmt[i] == 'E')
5155 {
5156 int j;
5157
5158 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5159 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5160 return 1;
5161 }
5162
5163 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5164 return 1;
5165 }
5166
5167 return 0;
5168 }
5169
5170 /* Return 1 if it is appropriate to emit `ret' instructions in the
5171 body of a function. Do this only if the epilogue is simple, needing a
5172 couple of insns. Prior to reloading, we can't tell how many registers
5173 must be saved, so return 0 then. Return 0 if there is no frame
5174 marker to de-allocate. */
5175
5176 int
5177 ix86_can_use_return_insn_p (void)
5178 {
5179 struct ix86_frame frame;
5180
5181 if (! reload_completed || frame_pointer_needed)
5182 return 0;
5183
5184 /* Don't allow more than 32 pop, since that's all we can do
5185 with one instruction. */
5186 if (current_function_pops_args
5187 && current_function_args_size >= 32768)
5188 return 0;
5189
5190 ix86_compute_frame_layout (&frame);
5191 return frame.to_allocate == 0 && frame.nregs == 0;
5192 }
5193 \f
5194 /* Value should be nonzero if functions must have frame pointers.
5195 Zero means the frame pointer need not be set up (and parms may
5196 be accessed via the stack pointer) in functions that seem suitable. */
5197
5198 int
5199 ix86_frame_pointer_required (void)
5200 {
5201 /* If we accessed previous frames, then the generated code expects
5202 to be able to access the saved ebp value in our frame. */
5203 if (cfun->machine->accesses_prev_frame)
5204 return 1;
5205
5206 /* Several x86 os'es need a frame pointer for other reasons,
5207 usually pertaining to setjmp. */
5208 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5209 return 1;
5210
5211 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5212 the frame pointer by default. Turn it back on now if we've not
5213 got a leaf function. */
5214 if (TARGET_OMIT_LEAF_FRAME_POINTER
5215 && (!current_function_is_leaf
5216 || ix86_current_function_calls_tls_descriptor))
5217 return 1;
5218
5219 if (current_function_profile)
5220 return 1;
5221
5222 return 0;
5223 }
5224
5225 /* Record that the current function accesses previous call frames. */
5226
5227 void
5228 ix86_setup_frame_addresses (void)
5229 {
5230 cfun->machine->accesses_prev_frame = 1;
5231 }
5232 \f
5233 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5234 # define USE_HIDDEN_LINKONCE 1
5235 #else
5236 # define USE_HIDDEN_LINKONCE 0
5237 #endif
5238
5239 static int pic_labels_used;
5240
5241 /* Fills in the label name that should be used for a pc thunk for
5242 the given register. */
5243
5244 static void
5245 get_pc_thunk_name (char name[32], unsigned int regno)
5246 {
5247 gcc_assert (!TARGET_64BIT);
5248
5249 if (USE_HIDDEN_LINKONCE)
5250 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5251 else
5252 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5253 }
5254
5255
5256 /* This function generates code for -fpic that loads %ebx with
5257 the return address of the caller and then returns. */
5258
5259 void
5260 ix86_file_end (void)
5261 {
5262 rtx xops[2];
5263 int regno;
5264
5265 for (regno = 0; regno < 8; ++regno)
5266 {
5267 char name[32];
5268
5269 if (! ((pic_labels_used >> regno) & 1))
5270 continue;
5271
5272 get_pc_thunk_name (name, regno);
5273
5274 #if TARGET_MACHO
5275 if (TARGET_MACHO)
5276 {
5277 switch_to_section (darwin_sections[text_coal_section]);
5278 fputs ("\t.weak_definition\t", asm_out_file);
5279 assemble_name (asm_out_file, name);
5280 fputs ("\n\t.private_extern\t", asm_out_file);
5281 assemble_name (asm_out_file, name);
5282 fputs ("\n", asm_out_file);
5283 ASM_OUTPUT_LABEL (asm_out_file, name);
5284 }
5285 else
5286 #endif
5287 if (USE_HIDDEN_LINKONCE)
5288 {
5289 tree decl;
5290
5291 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5292 error_mark_node);
5293 TREE_PUBLIC (decl) = 1;
5294 TREE_STATIC (decl) = 1;
5295 DECL_ONE_ONLY (decl) = 1;
5296
5297 (*targetm.asm_out.unique_section) (decl, 0);
5298 switch_to_section (get_named_section (decl, NULL, 0));
5299
5300 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5301 fputs ("\t.hidden\t", asm_out_file);
5302 assemble_name (asm_out_file, name);
5303 fputc ('\n', asm_out_file);
5304 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5305 }
5306 else
5307 {
5308 switch_to_section (text_section);
5309 ASM_OUTPUT_LABEL (asm_out_file, name);
5310 }
5311
5312 xops[0] = gen_rtx_REG (SImode, regno);
5313 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5314 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5315 output_asm_insn ("ret", xops);
5316 }
5317
5318 if (NEED_INDICATE_EXEC_STACK)
5319 file_end_indicate_exec_stack ();
5320 }
5321
5322 /* Emit code for the SET_GOT patterns. */
5323
5324 const char *
5325 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5326 {
5327 rtx xops[3];
5328
5329 xops[0] = dest;
5330 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5331
5332 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5333 {
5334 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5335
5336 if (!flag_pic)
5337 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5338 else
5339 output_asm_insn ("call\t%a2", xops);
5340
5341 #if TARGET_MACHO
5342 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5343 is what will be referenced by the Mach-O PIC subsystem. */
5344 if (!label)
5345 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5346 #endif
5347
5348 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5349 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5350
5351 if (flag_pic)
5352 output_asm_insn ("pop{l}\t%0", xops);
5353 }
5354 else
5355 {
5356 char name[32];
5357 get_pc_thunk_name (name, REGNO (dest));
5358 pic_labels_used |= 1 << REGNO (dest);
5359
5360 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5361 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5362 output_asm_insn ("call\t%X2", xops);
5363 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5364 is what will be referenced by the Mach-O PIC subsystem. */
5365 #if TARGET_MACHO
5366 if (!label)
5367 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5368 else
5369 targetm.asm_out.internal_label (asm_out_file, "L",
5370 CODE_LABEL_NUMBER (label));
5371 #endif
5372 }
5373
5374 if (TARGET_MACHO)
5375 return "";
5376
5377 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5378 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5379 else
5380 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5381
5382 return "";
5383 }
5384
5385 /* Generate an "push" pattern for input ARG. */
5386
5387 static rtx
5388 gen_push (rtx arg)
5389 {
5390 return gen_rtx_SET (VOIDmode,
5391 gen_rtx_MEM (Pmode,
5392 gen_rtx_PRE_DEC (Pmode,
5393 stack_pointer_rtx)),
5394 arg);
5395 }
5396
5397 /* Return >= 0 if there is an unused call-clobbered register available
5398 for the entire function. */
5399
5400 static unsigned int
5401 ix86_select_alt_pic_regnum (void)
5402 {
5403 if (current_function_is_leaf && !current_function_profile
5404 && !ix86_current_function_calls_tls_descriptor)
5405 {
5406 int i;
5407 for (i = 2; i >= 0; --i)
5408 if (!regs_ever_live[i])
5409 return i;
5410 }
5411
5412 return INVALID_REGNUM;
5413 }
5414
5415 /* Return 1 if we need to save REGNO. */
5416 static int
5417 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5418 {
5419 if (pic_offset_table_rtx
5420 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5421 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5422 || current_function_profile
5423 || current_function_calls_eh_return
5424 || current_function_uses_const_pool))
5425 {
5426 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5427 return 0;
5428 return 1;
5429 }
5430
5431 if (current_function_calls_eh_return && maybe_eh_return)
5432 {
5433 unsigned i;
5434 for (i = 0; ; i++)
5435 {
5436 unsigned test = EH_RETURN_DATA_REGNO (i);
5437 if (test == INVALID_REGNUM)
5438 break;
5439 if (test == regno)
5440 return 1;
5441 }
5442 }
5443
5444 if (cfun->machine->force_align_arg_pointer
5445 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5446 return 1;
5447
5448 return (regs_ever_live[regno]
5449 && !call_used_regs[regno]
5450 && !fixed_regs[regno]
5451 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5452 }
5453
5454 /* Return number of registers to be saved on the stack. */
5455
5456 static int
5457 ix86_nsaved_regs (void)
5458 {
5459 int nregs = 0;
5460 int regno;
5461
5462 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5463 if (ix86_save_reg (regno, true))
5464 nregs++;
5465 return nregs;
5466 }
5467
5468 /* Return the offset between two registers, one to be eliminated, and the other
5469 its replacement, at the start of a routine. */
5470
5471 HOST_WIDE_INT
5472 ix86_initial_elimination_offset (int from, int to)
5473 {
5474 struct ix86_frame frame;
5475 ix86_compute_frame_layout (&frame);
5476
5477 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5478 return frame.hard_frame_pointer_offset;
5479 else if (from == FRAME_POINTER_REGNUM
5480 && to == HARD_FRAME_POINTER_REGNUM)
5481 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5482 else
5483 {
5484 gcc_assert (to == STACK_POINTER_REGNUM);
5485
5486 if (from == ARG_POINTER_REGNUM)
5487 return frame.stack_pointer_offset;
5488
5489 gcc_assert (from == FRAME_POINTER_REGNUM);
5490 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5491 }
5492 }
5493
5494 /* Fill structure ix86_frame about frame of currently computed function. */
5495
5496 static void
5497 ix86_compute_frame_layout (struct ix86_frame *frame)
5498 {
5499 HOST_WIDE_INT total_size;
5500 unsigned int stack_alignment_needed;
5501 HOST_WIDE_INT offset;
5502 unsigned int preferred_alignment;
5503 HOST_WIDE_INT size = get_frame_size ();
5504
5505 frame->nregs = ix86_nsaved_regs ();
5506 total_size = size;
5507
5508 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5509 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5510
5511 /* During reload iteration the amount of registers saved can change.
5512 Recompute the value as needed. Do not recompute when amount of registers
5513 didn't change as reload does multiple calls to the function and does not
5514 expect the decision to change within single iteration. */
5515 if (!optimize_size
5516 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5517 {
5518 int count = frame->nregs;
5519
5520 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5521 /* The fast prologue uses move instead of push to save registers. This
5522 is significantly longer, but also executes faster as modern hardware
5523 can execute the moves in parallel, but can't do that for push/pop.
5524
5525 Be careful about choosing what prologue to emit: When function takes
5526 many instructions to execute we may use slow version as well as in
5527 case function is known to be outside hot spot (this is known with
5528 feedback only). Weight the size of function by number of registers
5529 to save as it is cheap to use one or two push instructions but very
5530 slow to use many of them. */
5531 if (count)
5532 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5533 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5534 || (flag_branch_probabilities
5535 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5536 cfun->machine->use_fast_prologue_epilogue = false;
5537 else
5538 cfun->machine->use_fast_prologue_epilogue
5539 = !expensive_function_p (count);
5540 }
5541 if (TARGET_PROLOGUE_USING_MOVE
5542 && cfun->machine->use_fast_prologue_epilogue)
5543 frame->save_regs_using_mov = true;
5544 else
5545 frame->save_regs_using_mov = false;
5546
5547
5548 /* Skip return address and saved base pointer. */
5549 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5550
5551 frame->hard_frame_pointer_offset = offset;
5552
5553 /* Do some sanity checking of stack_alignment_needed and
5554 preferred_alignment, since i386 port is the only using those features
5555 that may break easily. */
5556
5557 gcc_assert (!size || stack_alignment_needed);
5558 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5559 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5560 gcc_assert (stack_alignment_needed
5561 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5562
5563 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5564 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5565
5566 /* Register save area */
5567 offset += frame->nregs * UNITS_PER_WORD;
5568
5569 /* Va-arg area */
5570 if (ix86_save_varrargs_registers)
5571 {
5572 offset += X86_64_VARARGS_SIZE;
5573 frame->va_arg_size = X86_64_VARARGS_SIZE;
5574 }
5575 else
5576 frame->va_arg_size = 0;
5577
5578 /* Align start of frame for local function. */
5579 frame->padding1 = ((offset + stack_alignment_needed - 1)
5580 & -stack_alignment_needed) - offset;
5581
5582 offset += frame->padding1;
5583
5584 /* Frame pointer points here. */
5585 frame->frame_pointer_offset = offset;
5586
5587 offset += size;
5588
5589 /* Add outgoing arguments area. Can be skipped if we eliminated
5590 all the function calls as dead code.
5591 Skipping is however impossible when function calls alloca. Alloca
5592 expander assumes that last current_function_outgoing_args_size
5593 of stack frame are unused. */
5594 if (ACCUMULATE_OUTGOING_ARGS
5595 && (!current_function_is_leaf || current_function_calls_alloca
5596 || ix86_current_function_calls_tls_descriptor))
5597 {
5598 offset += current_function_outgoing_args_size;
5599 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5600 }
5601 else
5602 frame->outgoing_arguments_size = 0;
5603
5604 /* Align stack boundary. Only needed if we're calling another function
5605 or using alloca. */
5606 if (!current_function_is_leaf || current_function_calls_alloca
5607 || ix86_current_function_calls_tls_descriptor)
5608 frame->padding2 = ((offset + preferred_alignment - 1)
5609 & -preferred_alignment) - offset;
5610 else
5611 frame->padding2 = 0;
5612
5613 offset += frame->padding2;
5614
5615 /* We've reached end of stack frame. */
5616 frame->stack_pointer_offset = offset;
5617
5618 /* Size prologue needs to allocate. */
5619 frame->to_allocate =
5620 (size + frame->padding1 + frame->padding2
5621 + frame->outgoing_arguments_size + frame->va_arg_size);
5622
5623 if ((!frame->to_allocate && frame->nregs <= 1)
5624 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5625 frame->save_regs_using_mov = false;
5626
5627 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5628 && current_function_is_leaf
5629 && !ix86_current_function_calls_tls_descriptor)
5630 {
5631 frame->red_zone_size = frame->to_allocate;
5632 if (frame->save_regs_using_mov)
5633 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5634 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5635 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5636 }
5637 else
5638 frame->red_zone_size = 0;
5639 frame->to_allocate -= frame->red_zone_size;
5640 frame->stack_pointer_offset -= frame->red_zone_size;
5641 #if 0
5642 fprintf (stderr, "\n");
5643 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5644 fprintf (stderr, "size: %ld\n", (long)size);
5645 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5646 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5647 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5648 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5649 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5650 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5651 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5652 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5653 (long)frame->hard_frame_pointer_offset);
5654 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5655 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5656 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5657 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5658 #endif
5659 }
5660
5661 /* Emit code to save registers in the prologue. */
5662
5663 static void
5664 ix86_emit_save_regs (void)
5665 {
5666 unsigned int regno;
5667 rtx insn;
5668
5669 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5670 if (ix86_save_reg (regno, true))
5671 {
5672 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5673 RTX_FRAME_RELATED_P (insn) = 1;
5674 }
5675 }
5676
5677 /* Emit code to save registers using MOV insns. First register
5678 is restored from POINTER + OFFSET. */
5679 static void
5680 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5681 {
5682 unsigned int regno;
5683 rtx insn;
5684
5685 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5686 if (ix86_save_reg (regno, true))
5687 {
5688 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5689 Pmode, offset),
5690 gen_rtx_REG (Pmode, regno));
5691 RTX_FRAME_RELATED_P (insn) = 1;
5692 offset += UNITS_PER_WORD;
5693 }
5694 }
5695
5696 /* Expand prologue or epilogue stack adjustment.
5697 The pattern exist to put a dependency on all ebp-based memory accesses.
5698 STYLE should be negative if instructions should be marked as frame related,
5699 zero if %r11 register is live and cannot be freely used and positive
5700 otherwise. */
5701
5702 static void
5703 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5704 {
5705 rtx insn;
5706
5707 if (! TARGET_64BIT)
5708 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5709 else if (x86_64_immediate_operand (offset, DImode))
5710 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5711 else
5712 {
5713 rtx r11;
5714 /* r11 is used by indirect sibcall return as well, set before the
5715 epilogue and used after the epilogue. ATM indirect sibcall
5716 shouldn't be used together with huge frame sizes in one
5717 function because of the frame_size check in sibcall.c. */
5718 gcc_assert (style);
5719 r11 = gen_rtx_REG (DImode, R11_REG);
5720 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5721 if (style < 0)
5722 RTX_FRAME_RELATED_P (insn) = 1;
5723 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5724 offset));
5725 }
5726 if (style < 0)
5727 RTX_FRAME_RELATED_P (insn) = 1;
5728 }
5729
5730 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5731
5732 static rtx
5733 ix86_internal_arg_pointer (void)
5734 {
5735 bool has_force_align_arg_pointer =
5736 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5737 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5738 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5739 && DECL_NAME (current_function_decl)
5740 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5741 && DECL_FILE_SCOPE_P (current_function_decl))
5742 || ix86_force_align_arg_pointer
5743 || has_force_align_arg_pointer)
5744 {
5745 /* Nested functions can't realign the stack due to a register
5746 conflict. */
5747 if (DECL_CONTEXT (current_function_decl)
5748 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5749 {
5750 if (ix86_force_align_arg_pointer)
5751 warning (0, "-mstackrealign ignored for nested functions");
5752 if (has_force_align_arg_pointer)
5753 error ("%s not supported for nested functions",
5754 ix86_force_align_arg_pointer_string);
5755 return virtual_incoming_args_rtx;
5756 }
5757 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5758 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5759 }
5760 else
5761 return virtual_incoming_args_rtx;
5762 }
5763
5764 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5765 This is called from dwarf2out.c to emit call frame instructions
5766 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5767 static void
5768 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5769 {
5770 rtx unspec = SET_SRC (pattern);
5771 gcc_assert (GET_CODE (unspec) == UNSPEC);
5772
5773 switch (index)
5774 {
5775 case UNSPEC_REG_SAVE:
5776 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5777 SET_DEST (pattern));
5778 break;
5779 case UNSPEC_DEF_CFA:
5780 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5781 INTVAL (XVECEXP (unspec, 0, 0)));
5782 break;
5783 default:
5784 gcc_unreachable ();
5785 }
5786 }
5787
5788 /* Expand the prologue into a bunch of separate insns. */
5789
5790 void
5791 ix86_expand_prologue (void)
5792 {
5793 rtx insn;
5794 bool pic_reg_used;
5795 struct ix86_frame frame;
5796 HOST_WIDE_INT allocate;
5797
5798 ix86_compute_frame_layout (&frame);
5799
5800 if (cfun->machine->force_align_arg_pointer)
5801 {
5802 rtx x, y;
5803
5804 /* Grab the argument pointer. */
5805 x = plus_constant (stack_pointer_rtx, 4);
5806 y = cfun->machine->force_align_arg_pointer;
5807 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5808 RTX_FRAME_RELATED_P (insn) = 1;
5809
5810 /* The unwind info consists of two parts: install the fafp as the cfa,
5811 and record the fafp as the "save register" of the stack pointer.
5812 The later is there in order that the unwinder can see where it
5813 should restore the stack pointer across the and insn. */
5814 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5815 x = gen_rtx_SET (VOIDmode, y, x);
5816 RTX_FRAME_RELATED_P (x) = 1;
5817 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5818 UNSPEC_REG_SAVE);
5819 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5820 RTX_FRAME_RELATED_P (y) = 1;
5821 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5822 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5823 REG_NOTES (insn) = x;
5824
5825 /* Align the stack. */
5826 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5827 GEN_INT (-16)));
5828
5829 /* And here we cheat like madmen with the unwind info. We force the
5830 cfa register back to sp+4, which is exactly what it was at the
5831 start of the function. Re-pushing the return address results in
5832 the return at the same spot relative to the cfa, and thus is
5833 correct wrt the unwind info. */
5834 x = cfun->machine->force_align_arg_pointer;
5835 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5836 insn = emit_insn (gen_push (x));
5837 RTX_FRAME_RELATED_P (insn) = 1;
5838
5839 x = GEN_INT (4);
5840 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5841 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5842 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5843 REG_NOTES (insn) = x;
5844 }
5845
5846 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5847 slower on all targets. Also sdb doesn't like it. */
5848
5849 if (frame_pointer_needed)
5850 {
5851 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5852 RTX_FRAME_RELATED_P (insn) = 1;
5853
5854 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5855 RTX_FRAME_RELATED_P (insn) = 1;
5856 }
5857
5858 allocate = frame.to_allocate;
5859
5860 if (!frame.save_regs_using_mov)
5861 ix86_emit_save_regs ();
5862 else
5863 allocate += frame.nregs * UNITS_PER_WORD;
5864
5865 /* When using red zone we may start register saving before allocating
5866 the stack frame saving one cycle of the prologue. */
5867 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5868 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5869 : stack_pointer_rtx,
5870 -frame.nregs * UNITS_PER_WORD);
5871
5872 if (allocate == 0)
5873 ;
5874 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5875 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5876 GEN_INT (-allocate), -1);
5877 else
5878 {
5879 /* Only valid for Win32. */
5880 rtx eax = gen_rtx_REG (SImode, 0);
5881 bool eax_live = ix86_eax_live_at_start_p ();
5882 rtx t;
5883
5884 gcc_assert (!TARGET_64BIT);
5885
5886 if (eax_live)
5887 {
5888 emit_insn (gen_push (eax));
5889 allocate -= 4;
5890 }
5891
5892 emit_move_insn (eax, GEN_INT (allocate));
5893
5894 insn = emit_insn (gen_allocate_stack_worker (eax));
5895 RTX_FRAME_RELATED_P (insn) = 1;
5896 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5897 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5898 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5899 t, REG_NOTES (insn));
5900
5901 if (eax_live)
5902 {
5903 if (frame_pointer_needed)
5904 t = plus_constant (hard_frame_pointer_rtx,
5905 allocate
5906 - frame.to_allocate
5907 - frame.nregs * UNITS_PER_WORD);
5908 else
5909 t = plus_constant (stack_pointer_rtx, allocate);
5910 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5911 }
5912 }
5913
5914 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5915 {
5916 if (!frame_pointer_needed || !frame.to_allocate)
5917 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5918 else
5919 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5920 -frame.nregs * UNITS_PER_WORD);
5921 }
5922
5923 pic_reg_used = false;
5924 if (pic_offset_table_rtx
5925 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5926 || current_function_profile))
5927 {
5928 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5929
5930 if (alt_pic_reg_used != INVALID_REGNUM)
5931 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5932
5933 pic_reg_used = true;
5934 }
5935
5936 if (pic_reg_used)
5937 {
5938 if (TARGET_64BIT)
5939 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5940 else
5941 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5942
5943 /* Even with accurate pre-reload life analysis, we can wind up
5944 deleting all references to the pic register after reload.
5945 Consider if cross-jumping unifies two sides of a branch
5946 controlled by a comparison vs the only read from a global.
5947 In which case, allow the set_got to be deleted, though we're
5948 too late to do anything about the ebx save in the prologue. */
5949 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5950 }
5951
5952 /* Prevent function calls from be scheduled before the call to mcount.
5953 In the pic_reg_used case, make sure that the got load isn't deleted. */
5954 if (current_function_profile)
5955 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5956 }
5957
5958 /* Emit code to restore saved registers using MOV insns. First register
5959 is restored from POINTER + OFFSET. */
5960 static void
5961 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5962 int maybe_eh_return)
5963 {
5964 int regno;
5965 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5966
5967 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5968 if (ix86_save_reg (regno, maybe_eh_return))
5969 {
5970 /* Ensure that adjust_address won't be forced to produce pointer
5971 out of range allowed by x86-64 instruction set. */
5972 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5973 {
5974 rtx r11;
5975
5976 r11 = gen_rtx_REG (DImode, R11_REG);
5977 emit_move_insn (r11, GEN_INT (offset));
5978 emit_insn (gen_adddi3 (r11, r11, pointer));
5979 base_address = gen_rtx_MEM (Pmode, r11);
5980 offset = 0;
5981 }
5982 emit_move_insn (gen_rtx_REG (Pmode, regno),
5983 adjust_address (base_address, Pmode, offset));
5984 offset += UNITS_PER_WORD;
5985 }
5986 }
5987
5988 /* Restore function stack, frame, and registers. */
5989
5990 void
5991 ix86_expand_epilogue (int style)
5992 {
5993 int regno;
5994 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5995 struct ix86_frame frame;
5996 HOST_WIDE_INT offset;
5997
5998 ix86_compute_frame_layout (&frame);
5999
6000 /* Calculate start of saved registers relative to ebp. Special care
6001 must be taken for the normal return case of a function using
6002 eh_return: the eax and edx registers are marked as saved, but not
6003 restored along this path. */
6004 offset = frame.nregs;
6005 if (current_function_calls_eh_return && style != 2)
6006 offset -= 2;
6007 offset *= -UNITS_PER_WORD;
6008
6009 /* If we're only restoring one register and sp is not valid then
6010 using a move instruction to restore the register since it's
6011 less work than reloading sp and popping the register.
6012
6013 The default code result in stack adjustment using add/lea instruction,
6014 while this code results in LEAVE instruction (or discrete equivalent),
6015 so it is profitable in some other cases as well. Especially when there
6016 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6017 and there is exactly one register to pop. This heuristic may need some
6018 tuning in future. */
6019 if ((!sp_valid && frame.nregs <= 1)
6020 || (TARGET_EPILOGUE_USING_MOVE
6021 && cfun->machine->use_fast_prologue_epilogue
6022 && (frame.nregs > 1 || frame.to_allocate))
6023 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6024 || (frame_pointer_needed && TARGET_USE_LEAVE
6025 && cfun->machine->use_fast_prologue_epilogue
6026 && frame.nregs == 1)
6027 || current_function_calls_eh_return)
6028 {
6029 /* Restore registers. We can use ebp or esp to address the memory
6030 locations. If both are available, default to ebp, since offsets
6031 are known to be small. Only exception is esp pointing directly to the
6032 end of block of saved registers, where we may simplify addressing
6033 mode. */
6034
6035 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6036 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6037 frame.to_allocate, style == 2);
6038 else
6039 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6040 offset, style == 2);
6041
6042 /* eh_return epilogues need %ecx added to the stack pointer. */
6043 if (style == 2)
6044 {
6045 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6046
6047 if (frame_pointer_needed)
6048 {
6049 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6050 tmp = plus_constant (tmp, UNITS_PER_WORD);
6051 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6052
6053 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6054 emit_move_insn (hard_frame_pointer_rtx, tmp);
6055
6056 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6057 const0_rtx, style);
6058 }
6059 else
6060 {
6061 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6062 tmp = plus_constant (tmp, (frame.to_allocate
6063 + frame.nregs * UNITS_PER_WORD));
6064 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6065 }
6066 }
6067 else if (!frame_pointer_needed)
6068 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6069 GEN_INT (frame.to_allocate
6070 + frame.nregs * UNITS_PER_WORD),
6071 style);
6072 /* If not an i386, mov & pop is faster than "leave". */
6073 else if (TARGET_USE_LEAVE || optimize_size
6074 || !cfun->machine->use_fast_prologue_epilogue)
6075 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6076 else
6077 {
6078 pro_epilogue_adjust_stack (stack_pointer_rtx,
6079 hard_frame_pointer_rtx,
6080 const0_rtx, style);
6081 if (TARGET_64BIT)
6082 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6083 else
6084 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6085 }
6086 }
6087 else
6088 {
6089 /* First step is to deallocate the stack frame so that we can
6090 pop the registers. */
6091 if (!sp_valid)
6092 {
6093 gcc_assert (frame_pointer_needed);
6094 pro_epilogue_adjust_stack (stack_pointer_rtx,
6095 hard_frame_pointer_rtx,
6096 GEN_INT (offset), style);
6097 }
6098 else if (frame.to_allocate)
6099 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6100 GEN_INT (frame.to_allocate), style);
6101
6102 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6103 if (ix86_save_reg (regno, false))
6104 {
6105 if (TARGET_64BIT)
6106 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6107 else
6108 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6109 }
6110 if (frame_pointer_needed)
6111 {
6112 /* Leave results in shorter dependency chains on CPUs that are
6113 able to grok it fast. */
6114 if (TARGET_USE_LEAVE)
6115 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6116 else if (TARGET_64BIT)
6117 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6118 else
6119 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6120 }
6121 }
6122
6123 if (cfun->machine->force_align_arg_pointer)
6124 {
6125 emit_insn (gen_addsi3 (stack_pointer_rtx,
6126 cfun->machine->force_align_arg_pointer,
6127 GEN_INT (-4)));
6128 }
6129
6130 /* Sibcall epilogues don't want a return instruction. */
6131 if (style == 0)
6132 return;
6133
6134 if (current_function_pops_args && current_function_args_size)
6135 {
6136 rtx popc = GEN_INT (current_function_pops_args);
6137
6138 /* i386 can only pop 64K bytes. If asked to pop more, pop
6139 return address, do explicit add, and jump indirectly to the
6140 caller. */
6141
6142 if (current_function_pops_args >= 65536)
6143 {
6144 rtx ecx = gen_rtx_REG (SImode, 2);
6145
6146 /* There is no "pascal" calling convention in 64bit ABI. */
6147 gcc_assert (!TARGET_64BIT);
6148
6149 emit_insn (gen_popsi1 (ecx));
6150 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6151 emit_jump_insn (gen_return_indirect_internal (ecx));
6152 }
6153 else
6154 emit_jump_insn (gen_return_pop_internal (popc));
6155 }
6156 else
6157 emit_jump_insn (gen_return_internal ());
6158 }
6159
6160 /* Reset from the function's potential modifications. */
6161
6162 static void
6163 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6164 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6165 {
6166 if (pic_offset_table_rtx)
6167 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6168 #if TARGET_MACHO
6169 /* Mach-O doesn't support labels at the end of objects, so if
6170 it looks like we might want one, insert a NOP. */
6171 {
6172 rtx insn = get_last_insn ();
6173 while (insn
6174 && NOTE_P (insn)
6175 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6176 insn = PREV_INSN (insn);
6177 if (insn
6178 && (LABEL_P (insn)
6179 || (NOTE_P (insn)
6180 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6181 fputs ("\tnop\n", file);
6182 }
6183 #endif
6184
6185 }
6186 \f
6187 /* Extract the parts of an RTL expression that is a valid memory address
6188 for an instruction. Return 0 if the structure of the address is
6189 grossly off. Return -1 if the address contains ASHIFT, so it is not
6190 strictly valid, but still used for computing length of lea instruction. */
6191
6192 int
6193 ix86_decompose_address (rtx addr, struct ix86_address *out)
6194 {
6195 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6196 rtx base_reg, index_reg;
6197 HOST_WIDE_INT scale = 1;
6198 rtx scale_rtx = NULL_RTX;
6199 int retval = 1;
6200 enum ix86_address_seg seg = SEG_DEFAULT;
6201
6202 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6203 base = addr;
6204 else if (GET_CODE (addr) == PLUS)
6205 {
6206 rtx addends[4], op;
6207 int n = 0, i;
6208
6209 op = addr;
6210 do
6211 {
6212 if (n >= 4)
6213 return 0;
6214 addends[n++] = XEXP (op, 1);
6215 op = XEXP (op, 0);
6216 }
6217 while (GET_CODE (op) == PLUS);
6218 if (n >= 4)
6219 return 0;
6220 addends[n] = op;
6221
6222 for (i = n; i >= 0; --i)
6223 {
6224 op = addends[i];
6225 switch (GET_CODE (op))
6226 {
6227 case MULT:
6228 if (index)
6229 return 0;
6230 index = XEXP (op, 0);
6231 scale_rtx = XEXP (op, 1);
6232 break;
6233
6234 case UNSPEC:
6235 if (XINT (op, 1) == UNSPEC_TP
6236 && TARGET_TLS_DIRECT_SEG_REFS
6237 && seg == SEG_DEFAULT)
6238 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6239 else
6240 return 0;
6241 break;
6242
6243 case REG:
6244 case SUBREG:
6245 if (!base)
6246 base = op;
6247 else if (!index)
6248 index = op;
6249 else
6250 return 0;
6251 break;
6252
6253 case CONST:
6254 case CONST_INT:
6255 case SYMBOL_REF:
6256 case LABEL_REF:
6257 if (disp)
6258 return 0;
6259 disp = op;
6260 break;
6261
6262 default:
6263 return 0;
6264 }
6265 }
6266 }
6267 else if (GET_CODE (addr) == MULT)
6268 {
6269 index = XEXP (addr, 0); /* index*scale */
6270 scale_rtx = XEXP (addr, 1);
6271 }
6272 else if (GET_CODE (addr) == ASHIFT)
6273 {
6274 rtx tmp;
6275
6276 /* We're called for lea too, which implements ashift on occasion. */
6277 index = XEXP (addr, 0);
6278 tmp = XEXP (addr, 1);
6279 if (!CONST_INT_P (tmp))
6280 return 0;
6281 scale = INTVAL (tmp);
6282 if ((unsigned HOST_WIDE_INT) scale > 3)
6283 return 0;
6284 scale = 1 << scale;
6285 retval = -1;
6286 }
6287 else
6288 disp = addr; /* displacement */
6289
6290 /* Extract the integral value of scale. */
6291 if (scale_rtx)
6292 {
6293 if (!CONST_INT_P (scale_rtx))
6294 return 0;
6295 scale = INTVAL (scale_rtx);
6296 }
6297
6298 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6299 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6300
6301 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6302 if (base_reg && index_reg && scale == 1
6303 && (index_reg == arg_pointer_rtx
6304 || index_reg == frame_pointer_rtx
6305 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6306 {
6307 rtx tmp;
6308 tmp = base, base = index, index = tmp;
6309 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6310 }
6311
6312 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6313 if ((base_reg == hard_frame_pointer_rtx
6314 || base_reg == frame_pointer_rtx
6315 || base_reg == arg_pointer_rtx) && !disp)
6316 disp = const0_rtx;
6317
6318 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6319 Avoid this by transforming to [%esi+0]. */
6320 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6321 && base_reg && !index_reg && !disp
6322 && REG_P (base_reg)
6323 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6324 disp = const0_rtx;
6325
6326 /* Special case: encode reg+reg instead of reg*2. */
6327 if (!base && index && scale && scale == 2)
6328 base = index, base_reg = index_reg, scale = 1;
6329
6330 /* Special case: scaling cannot be encoded without base or displacement. */
6331 if (!base && !disp && index && scale != 1)
6332 disp = const0_rtx;
6333
6334 out->base = base;
6335 out->index = index;
6336 out->disp = disp;
6337 out->scale = scale;
6338 out->seg = seg;
6339
6340 return retval;
6341 }
6342 \f
6343 /* Return cost of the memory address x.
6344 For i386, it is better to use a complex address than let gcc copy
6345 the address into a reg and make a new pseudo. But not if the address
6346 requires to two regs - that would mean more pseudos with longer
6347 lifetimes. */
6348 static int
6349 ix86_address_cost (rtx x)
6350 {
6351 struct ix86_address parts;
6352 int cost = 1;
6353 int ok = ix86_decompose_address (x, &parts);
6354
6355 gcc_assert (ok);
6356
6357 if (parts.base && GET_CODE (parts.base) == SUBREG)
6358 parts.base = SUBREG_REG (parts.base);
6359 if (parts.index && GET_CODE (parts.index) == SUBREG)
6360 parts.index = SUBREG_REG (parts.index);
6361
6362 /* More complex memory references are better. */
6363 if (parts.disp && parts.disp != const0_rtx)
6364 cost--;
6365 if (parts.seg != SEG_DEFAULT)
6366 cost--;
6367
6368 /* Attempt to minimize number of registers in the address. */
6369 if ((parts.base
6370 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6371 || (parts.index
6372 && (!REG_P (parts.index)
6373 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6374 cost++;
6375
6376 if (parts.base
6377 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6378 && parts.index
6379 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6380 && parts.base != parts.index)
6381 cost++;
6382
6383 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6384 since it's predecode logic can't detect the length of instructions
6385 and it degenerates to vector decoded. Increase cost of such
6386 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6387 to split such addresses or even refuse such addresses at all.
6388
6389 Following addressing modes are affected:
6390 [base+scale*index]
6391 [scale*index+disp]
6392 [base+index]
6393
6394 The first and last case may be avoidable by explicitly coding the zero in
6395 memory address, but I don't have AMD-K6 machine handy to check this
6396 theory. */
6397
6398 if (TARGET_K6
6399 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6400 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6401 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6402 cost += 10;
6403
6404 return cost;
6405 }
6406 \f
6407 /* If X is a machine specific address (i.e. a symbol or label being
6408 referenced as a displacement from the GOT implemented using an
6409 UNSPEC), then return the base term. Otherwise return X. */
6410
6411 rtx
6412 ix86_find_base_term (rtx x)
6413 {
6414 rtx term;
6415
6416 if (TARGET_64BIT)
6417 {
6418 if (GET_CODE (x) != CONST)
6419 return x;
6420 term = XEXP (x, 0);
6421 if (GET_CODE (term) == PLUS
6422 && (CONST_INT_P (XEXP (term, 1))
6423 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6424 term = XEXP (term, 0);
6425 if (GET_CODE (term) != UNSPEC
6426 || XINT (term, 1) != UNSPEC_GOTPCREL)
6427 return x;
6428
6429 term = XVECEXP (term, 0, 0);
6430
6431 if (GET_CODE (term) != SYMBOL_REF
6432 && GET_CODE (term) != LABEL_REF)
6433 return x;
6434
6435 return term;
6436 }
6437
6438 term = ix86_delegitimize_address (x);
6439
6440 if (GET_CODE (term) != SYMBOL_REF
6441 && GET_CODE (term) != LABEL_REF)
6442 return x;
6443
6444 return term;
6445 }
6446
6447 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6448 this is used for to form addresses to local data when -fPIC is in
6449 use. */
6450
6451 static bool
6452 darwin_local_data_pic (rtx disp)
6453 {
6454 if (GET_CODE (disp) == MINUS)
6455 {
6456 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6457 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6458 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6459 {
6460 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6461 if (! strcmp (sym_name, "<pic base>"))
6462 return true;
6463 }
6464 }
6465
6466 return false;
6467 }
6468 \f
6469 /* Determine if a given RTX is a valid constant. We already know this
6470 satisfies CONSTANT_P. */
6471
6472 bool
6473 legitimate_constant_p (rtx x)
6474 {
6475 switch (GET_CODE (x))
6476 {
6477 case CONST:
6478 x = XEXP (x, 0);
6479
6480 if (GET_CODE (x) == PLUS)
6481 {
6482 if (!CONST_INT_P (XEXP (x, 1)))
6483 return false;
6484 x = XEXP (x, 0);
6485 }
6486
6487 if (TARGET_MACHO && darwin_local_data_pic (x))
6488 return true;
6489
6490 /* Only some unspecs are valid as "constants". */
6491 if (GET_CODE (x) == UNSPEC)
6492 switch (XINT (x, 1))
6493 {
6494 case UNSPEC_GOTOFF:
6495 return TARGET_64BIT;
6496 case UNSPEC_TPOFF:
6497 case UNSPEC_NTPOFF:
6498 x = XVECEXP (x, 0, 0);
6499 return (GET_CODE (x) == SYMBOL_REF
6500 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6501 case UNSPEC_DTPOFF:
6502 x = XVECEXP (x, 0, 0);
6503 return (GET_CODE (x) == SYMBOL_REF
6504 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6505 default:
6506 return false;
6507 }
6508
6509 /* We must have drilled down to a symbol. */
6510 if (GET_CODE (x) == LABEL_REF)
6511 return true;
6512 if (GET_CODE (x) != SYMBOL_REF)
6513 return false;
6514 /* FALLTHRU */
6515
6516 case SYMBOL_REF:
6517 /* TLS symbols are never valid. */
6518 if (SYMBOL_REF_TLS_MODEL (x))
6519 return false;
6520 break;
6521
6522 case CONST_DOUBLE:
6523 if (GET_MODE (x) == TImode
6524 && x != CONST0_RTX (TImode)
6525 && !TARGET_64BIT)
6526 return false;
6527 break;
6528
6529 case CONST_VECTOR:
6530 if (x == CONST0_RTX (GET_MODE (x)))
6531 return true;
6532 return false;
6533
6534 default:
6535 break;
6536 }
6537
6538 /* Otherwise we handle everything else in the move patterns. */
6539 return true;
6540 }
6541
6542 /* Determine if it's legal to put X into the constant pool. This
6543 is not possible for the address of thread-local symbols, which
6544 is checked above. */
6545
6546 static bool
6547 ix86_cannot_force_const_mem (rtx x)
6548 {
6549 /* We can always put integral constants and vectors in memory. */
6550 switch (GET_CODE (x))
6551 {
6552 case CONST_INT:
6553 case CONST_DOUBLE:
6554 case CONST_VECTOR:
6555 return false;
6556
6557 default:
6558 break;
6559 }
6560 return !legitimate_constant_p (x);
6561 }
6562
6563 /* Determine if a given RTX is a valid constant address. */
6564
6565 bool
6566 constant_address_p (rtx x)
6567 {
6568 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6569 }
6570
6571 /* Nonzero if the constant value X is a legitimate general operand
6572 when generating PIC code. It is given that flag_pic is on and
6573 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6574
6575 bool
6576 legitimate_pic_operand_p (rtx x)
6577 {
6578 rtx inner;
6579
6580 switch (GET_CODE (x))
6581 {
6582 case CONST:
6583 inner = XEXP (x, 0);
6584 if (GET_CODE (inner) == PLUS
6585 && CONST_INT_P (XEXP (inner, 1)))
6586 inner = XEXP (inner, 0);
6587
6588 /* Only some unspecs are valid as "constants". */
6589 if (GET_CODE (inner) == UNSPEC)
6590 switch (XINT (inner, 1))
6591 {
6592 case UNSPEC_GOTOFF:
6593 return TARGET_64BIT;
6594 case UNSPEC_TPOFF:
6595 x = XVECEXP (inner, 0, 0);
6596 return (GET_CODE (x) == SYMBOL_REF
6597 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6598 default:
6599 return false;
6600 }
6601 /* FALLTHRU */
6602
6603 case SYMBOL_REF:
6604 case LABEL_REF:
6605 return legitimate_pic_address_disp_p (x);
6606
6607 default:
6608 return true;
6609 }
6610 }
6611
6612 /* Determine if a given CONST RTX is a valid memory displacement
6613 in PIC mode. */
6614
6615 int
6616 legitimate_pic_address_disp_p (rtx disp)
6617 {
6618 bool saw_plus;
6619
6620 /* In 64bit mode we can allow direct addresses of symbols and labels
6621 when they are not dynamic symbols. */
6622 if (TARGET_64BIT)
6623 {
6624 rtx op0 = disp, op1;
6625
6626 switch (GET_CODE (disp))
6627 {
6628 case LABEL_REF:
6629 return true;
6630
6631 case CONST:
6632 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6633 break;
6634 op0 = XEXP (XEXP (disp, 0), 0);
6635 op1 = XEXP (XEXP (disp, 0), 1);
6636 if (!CONST_INT_P (op1)
6637 || INTVAL (op1) >= 16*1024*1024
6638 || INTVAL (op1) < -16*1024*1024)
6639 break;
6640 if (GET_CODE (op0) == LABEL_REF)
6641 return true;
6642 if (GET_CODE (op0) != SYMBOL_REF)
6643 break;
6644 /* FALLTHRU */
6645
6646 case SYMBOL_REF:
6647 /* TLS references should always be enclosed in UNSPEC. */
6648 if (SYMBOL_REF_TLS_MODEL (op0))
6649 return false;
6650 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6651 return true;
6652 break;
6653
6654 default:
6655 break;
6656 }
6657 }
6658 if (GET_CODE (disp) != CONST)
6659 return 0;
6660 disp = XEXP (disp, 0);
6661
6662 if (TARGET_64BIT)
6663 {
6664 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6665 of GOT tables. We should not need these anyway. */
6666 if (GET_CODE (disp) != UNSPEC
6667 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6668 && XINT (disp, 1) != UNSPEC_GOTOFF))
6669 return 0;
6670
6671 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6672 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6673 return 0;
6674 return 1;
6675 }
6676
6677 saw_plus = false;
6678 if (GET_CODE (disp) == PLUS)
6679 {
6680 if (!CONST_INT_P (XEXP (disp, 1)))
6681 return 0;
6682 disp = XEXP (disp, 0);
6683 saw_plus = true;
6684 }
6685
6686 if (TARGET_MACHO && darwin_local_data_pic (disp))
6687 return 1;
6688
6689 if (GET_CODE (disp) != UNSPEC)
6690 return 0;
6691
6692 switch (XINT (disp, 1))
6693 {
6694 case UNSPEC_GOT:
6695 if (saw_plus)
6696 return false;
6697 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6698 case UNSPEC_GOTOFF:
6699 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6700 While ABI specify also 32bit relocation but we don't produce it in
6701 small PIC model at all. */
6702 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6703 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6704 && !TARGET_64BIT)
6705 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6706 return false;
6707 case UNSPEC_GOTTPOFF:
6708 case UNSPEC_GOTNTPOFF:
6709 case UNSPEC_INDNTPOFF:
6710 if (saw_plus)
6711 return false;
6712 disp = XVECEXP (disp, 0, 0);
6713 return (GET_CODE (disp) == SYMBOL_REF
6714 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6715 case UNSPEC_NTPOFF:
6716 disp = XVECEXP (disp, 0, 0);
6717 return (GET_CODE (disp) == SYMBOL_REF
6718 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6719 case UNSPEC_DTPOFF:
6720 disp = XVECEXP (disp, 0, 0);
6721 return (GET_CODE (disp) == SYMBOL_REF
6722 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6723 }
6724
6725 return 0;
6726 }
6727
6728 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6729 memory address for an instruction. The MODE argument is the machine mode
6730 for the MEM expression that wants to use this address.
6731
6732 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6733 convert common non-canonical forms to canonical form so that they will
6734 be recognized. */
6735
6736 int
6737 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6738 {
6739 struct ix86_address parts;
6740 rtx base, index, disp;
6741 HOST_WIDE_INT scale;
6742 const char *reason = NULL;
6743 rtx reason_rtx = NULL_RTX;
6744
6745 if (TARGET_DEBUG_ADDR)
6746 {
6747 fprintf (stderr,
6748 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6749 GET_MODE_NAME (mode), strict);
6750 debug_rtx (addr);
6751 }
6752
6753 if (ix86_decompose_address (addr, &parts) <= 0)
6754 {
6755 reason = "decomposition failed";
6756 goto report_error;
6757 }
6758
6759 base = parts.base;
6760 index = parts.index;
6761 disp = parts.disp;
6762 scale = parts.scale;
6763
6764 /* Validate base register.
6765
6766 Don't allow SUBREG's that span more than a word here. It can lead to spill
6767 failures when the base is one word out of a two word structure, which is
6768 represented internally as a DImode int. */
6769
6770 if (base)
6771 {
6772 rtx reg;
6773 reason_rtx = base;
6774
6775 if (REG_P (base))
6776 reg = base;
6777 else if (GET_CODE (base) == SUBREG
6778 && REG_P (SUBREG_REG (base))
6779 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6780 <= UNITS_PER_WORD)
6781 reg = SUBREG_REG (base);
6782 else
6783 {
6784 reason = "base is not a register";
6785 goto report_error;
6786 }
6787
6788 if (GET_MODE (base) != Pmode)
6789 {
6790 reason = "base is not in Pmode";
6791 goto report_error;
6792 }
6793
6794 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6795 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6796 {
6797 reason = "base is not valid";
6798 goto report_error;
6799 }
6800 }
6801
6802 /* Validate index register.
6803
6804 Don't allow SUBREG's that span more than a word here -- same as above. */
6805
6806 if (index)
6807 {
6808 rtx reg;
6809 reason_rtx = index;
6810
6811 if (REG_P (index))
6812 reg = index;
6813 else if (GET_CODE (index) == SUBREG
6814 && REG_P (SUBREG_REG (index))
6815 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6816 <= UNITS_PER_WORD)
6817 reg = SUBREG_REG (index);
6818 else
6819 {
6820 reason = "index is not a register";
6821 goto report_error;
6822 }
6823
6824 if (GET_MODE (index) != Pmode)
6825 {
6826 reason = "index is not in Pmode";
6827 goto report_error;
6828 }
6829
6830 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6831 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6832 {
6833 reason = "index is not valid";
6834 goto report_error;
6835 }
6836 }
6837
6838 /* Validate scale factor. */
6839 if (scale != 1)
6840 {
6841 reason_rtx = GEN_INT (scale);
6842 if (!index)
6843 {
6844 reason = "scale without index";
6845 goto report_error;
6846 }
6847
6848 if (scale != 2 && scale != 4 && scale != 8)
6849 {
6850 reason = "scale is not a valid multiplier";
6851 goto report_error;
6852 }
6853 }
6854
6855 /* Validate displacement. */
6856 if (disp)
6857 {
6858 reason_rtx = disp;
6859
6860 if (GET_CODE (disp) == CONST
6861 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6862 switch (XINT (XEXP (disp, 0), 1))
6863 {
6864 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6865 used. While ABI specify also 32bit relocations, we don't produce
6866 them at all and use IP relative instead. */
6867 case UNSPEC_GOT:
6868 case UNSPEC_GOTOFF:
6869 gcc_assert (flag_pic);
6870 if (!TARGET_64BIT)
6871 goto is_legitimate_pic;
6872 reason = "64bit address unspec";
6873 goto report_error;
6874
6875 case UNSPEC_GOTPCREL:
6876 gcc_assert (flag_pic);
6877 goto is_legitimate_pic;
6878
6879 case UNSPEC_GOTTPOFF:
6880 case UNSPEC_GOTNTPOFF:
6881 case UNSPEC_INDNTPOFF:
6882 case UNSPEC_NTPOFF:
6883 case UNSPEC_DTPOFF:
6884 break;
6885
6886 default:
6887 reason = "invalid address unspec";
6888 goto report_error;
6889 }
6890
6891 else if (SYMBOLIC_CONST (disp)
6892 && (flag_pic
6893 || (TARGET_MACHO
6894 #if TARGET_MACHO
6895 && MACHOPIC_INDIRECT
6896 && !machopic_operand_p (disp)
6897 #endif
6898 )))
6899 {
6900
6901 is_legitimate_pic:
6902 if (TARGET_64BIT && (index || base))
6903 {
6904 /* foo@dtpoff(%rX) is ok. */
6905 if (GET_CODE (disp) != CONST
6906 || GET_CODE (XEXP (disp, 0)) != PLUS
6907 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6908 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6909 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6910 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6911 {
6912 reason = "non-constant pic memory reference";
6913 goto report_error;
6914 }
6915 }
6916 else if (! legitimate_pic_address_disp_p (disp))
6917 {
6918 reason = "displacement is an invalid pic construct";
6919 goto report_error;
6920 }
6921
6922 /* This code used to verify that a symbolic pic displacement
6923 includes the pic_offset_table_rtx register.
6924
6925 While this is good idea, unfortunately these constructs may
6926 be created by "adds using lea" optimization for incorrect
6927 code like:
6928
6929 int a;
6930 int foo(int i)
6931 {
6932 return *(&a+i);
6933 }
6934
6935 This code is nonsensical, but results in addressing
6936 GOT table with pic_offset_table_rtx base. We can't
6937 just refuse it easily, since it gets matched by
6938 "addsi3" pattern, that later gets split to lea in the
6939 case output register differs from input. While this
6940 can be handled by separate addsi pattern for this case
6941 that never results in lea, this seems to be easier and
6942 correct fix for crash to disable this test. */
6943 }
6944 else if (GET_CODE (disp) != LABEL_REF
6945 && !CONST_INT_P (disp)
6946 && (GET_CODE (disp) != CONST
6947 || !legitimate_constant_p (disp))
6948 && (GET_CODE (disp) != SYMBOL_REF
6949 || !legitimate_constant_p (disp)))
6950 {
6951 reason = "displacement is not constant";
6952 goto report_error;
6953 }
6954 else if (TARGET_64BIT
6955 && !x86_64_immediate_operand (disp, VOIDmode))
6956 {
6957 reason = "displacement is out of range";
6958 goto report_error;
6959 }
6960 }
6961
6962 /* Everything looks valid. */
6963 if (TARGET_DEBUG_ADDR)
6964 fprintf (stderr, "Success.\n");
6965 return TRUE;
6966
6967 report_error:
6968 if (TARGET_DEBUG_ADDR)
6969 {
6970 fprintf (stderr, "Error: %s\n", reason);
6971 debug_rtx (reason_rtx);
6972 }
6973 return FALSE;
6974 }
6975 \f
6976 /* Return a unique alias set for the GOT. */
6977
6978 static HOST_WIDE_INT
6979 ix86_GOT_alias_set (void)
6980 {
6981 static HOST_WIDE_INT set = -1;
6982 if (set == -1)
6983 set = new_alias_set ();
6984 return set;
6985 }
6986
6987 /* Return a legitimate reference for ORIG (an address) using the
6988 register REG. If REG is 0, a new pseudo is generated.
6989
6990 There are two types of references that must be handled:
6991
6992 1. Global data references must load the address from the GOT, via
6993 the PIC reg. An insn is emitted to do this load, and the reg is
6994 returned.
6995
6996 2. Static data references, constant pool addresses, and code labels
6997 compute the address as an offset from the GOT, whose base is in
6998 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6999 differentiate them from global data objects. The returned
7000 address is the PIC reg + an unspec constant.
7001
7002 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7003 reg also appears in the address. */
7004
7005 static rtx
7006 legitimize_pic_address (rtx orig, rtx reg)
7007 {
7008 rtx addr = orig;
7009 rtx new = orig;
7010 rtx base;
7011
7012 #if TARGET_MACHO
7013 if (TARGET_MACHO && !TARGET_64BIT)
7014 {
7015 if (reg == 0)
7016 reg = gen_reg_rtx (Pmode);
7017 /* Use the generic Mach-O PIC machinery. */
7018 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7019 }
7020 #endif
7021
7022 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7023 new = addr;
7024 else if (TARGET_64BIT
7025 && ix86_cmodel != CM_SMALL_PIC
7026 && local_symbolic_operand (addr, Pmode))
7027 {
7028 rtx tmpreg;
7029 /* This symbol may be referenced via a displacement from the PIC
7030 base address (@GOTOFF). */
7031
7032 if (reload_in_progress)
7033 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7034 if (GET_CODE (addr) == CONST)
7035 addr = XEXP (addr, 0);
7036 if (GET_CODE (addr) == PLUS)
7037 {
7038 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7039 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7040 }
7041 else
7042 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7043 new = gen_rtx_CONST (Pmode, new);
7044 if (!reg)
7045 tmpreg = gen_reg_rtx (Pmode);
7046 else
7047 tmpreg = reg;
7048 emit_move_insn (tmpreg, new);
7049
7050 if (reg != 0)
7051 {
7052 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7053 tmpreg, 1, OPTAB_DIRECT);
7054 new = reg;
7055 }
7056 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7057 }
7058 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7059 {
7060 /* This symbol may be referenced via a displacement from the PIC
7061 base address (@GOTOFF). */
7062
7063 if (reload_in_progress)
7064 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7065 if (GET_CODE (addr) == CONST)
7066 addr = XEXP (addr, 0);
7067 if (GET_CODE (addr) == PLUS)
7068 {
7069 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7070 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7071 }
7072 else
7073 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7074 new = gen_rtx_CONST (Pmode, new);
7075 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7076
7077 if (reg != 0)
7078 {
7079 emit_move_insn (reg, new);
7080 new = reg;
7081 }
7082 }
7083 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7084 {
7085 if (TARGET_64BIT)
7086 {
7087 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7088 new = gen_rtx_CONST (Pmode, new);
7089 new = gen_const_mem (Pmode, new);
7090 set_mem_alias_set (new, ix86_GOT_alias_set ());
7091
7092 if (reg == 0)
7093 reg = gen_reg_rtx (Pmode);
7094 /* Use directly gen_movsi, otherwise the address is loaded
7095 into register for CSE. We don't want to CSE this addresses,
7096 instead we CSE addresses from the GOT table, so skip this. */
7097 emit_insn (gen_movsi (reg, new));
7098 new = reg;
7099 }
7100 else
7101 {
7102 /* This symbol must be referenced via a load from the
7103 Global Offset Table (@GOT). */
7104
7105 if (reload_in_progress)
7106 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7107 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7108 new = gen_rtx_CONST (Pmode, new);
7109 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7110 new = gen_const_mem (Pmode, new);
7111 set_mem_alias_set (new, ix86_GOT_alias_set ());
7112
7113 if (reg == 0)
7114 reg = gen_reg_rtx (Pmode);
7115 emit_move_insn (reg, new);
7116 new = reg;
7117 }
7118 }
7119 else
7120 {
7121 if (CONST_INT_P (addr)
7122 && !x86_64_immediate_operand (addr, VOIDmode))
7123 {
7124 if (reg)
7125 {
7126 emit_move_insn (reg, addr);
7127 new = reg;
7128 }
7129 else
7130 new = force_reg (Pmode, addr);
7131 }
7132 else if (GET_CODE (addr) == CONST)
7133 {
7134 addr = XEXP (addr, 0);
7135
7136 /* We must match stuff we generate before. Assume the only
7137 unspecs that can get here are ours. Not that we could do
7138 anything with them anyway.... */
7139 if (GET_CODE (addr) == UNSPEC
7140 || (GET_CODE (addr) == PLUS
7141 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7142 return orig;
7143 gcc_assert (GET_CODE (addr) == PLUS);
7144 }
7145 if (GET_CODE (addr) == PLUS)
7146 {
7147 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7148
7149 /* Check first to see if this is a constant offset from a @GOTOFF
7150 symbol reference. */
7151 if (local_symbolic_operand (op0, Pmode)
7152 && CONST_INT_P (op1))
7153 {
7154 if (!TARGET_64BIT)
7155 {
7156 if (reload_in_progress)
7157 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7158 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7159 UNSPEC_GOTOFF);
7160 new = gen_rtx_PLUS (Pmode, new, op1);
7161 new = gen_rtx_CONST (Pmode, new);
7162 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7163
7164 if (reg != 0)
7165 {
7166 emit_move_insn (reg, new);
7167 new = reg;
7168 }
7169 }
7170 else
7171 {
7172 if (INTVAL (op1) < -16*1024*1024
7173 || INTVAL (op1) >= 16*1024*1024)
7174 {
7175 if (!x86_64_immediate_operand (op1, Pmode))
7176 op1 = force_reg (Pmode, op1);
7177 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7178 }
7179 }
7180 }
7181 else
7182 {
7183 base = legitimize_pic_address (XEXP (addr, 0), reg);
7184 new = legitimize_pic_address (XEXP (addr, 1),
7185 base == reg ? NULL_RTX : reg);
7186
7187 if (CONST_INT_P (new))
7188 new = plus_constant (base, INTVAL (new));
7189 else
7190 {
7191 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7192 {
7193 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7194 new = XEXP (new, 1);
7195 }
7196 new = gen_rtx_PLUS (Pmode, base, new);
7197 }
7198 }
7199 }
7200 }
7201 return new;
7202 }
7203 \f
7204 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7205
7206 static rtx
7207 get_thread_pointer (int to_reg)
7208 {
7209 rtx tp, reg, insn;
7210
7211 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7212 if (!to_reg)
7213 return tp;
7214
7215 reg = gen_reg_rtx (Pmode);
7216 insn = gen_rtx_SET (VOIDmode, reg, tp);
7217 insn = emit_insn (insn);
7218
7219 return reg;
7220 }
7221
7222 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7223 false if we expect this to be used for a memory address and true if
7224 we expect to load the address into a register. */
7225
7226 static rtx
7227 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7228 {
7229 rtx dest, base, off, pic, tp;
7230 int type;
7231
7232 switch (model)
7233 {
7234 case TLS_MODEL_GLOBAL_DYNAMIC:
7235 dest = gen_reg_rtx (Pmode);
7236 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7237
7238 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7239 {
7240 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7241
7242 start_sequence ();
7243 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7244 insns = get_insns ();
7245 end_sequence ();
7246
7247 emit_libcall_block (insns, dest, rax, x);
7248 }
7249 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7250 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7251 else
7252 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7253
7254 if (TARGET_GNU2_TLS)
7255 {
7256 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7257
7258 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7259 }
7260 break;
7261
7262 case TLS_MODEL_LOCAL_DYNAMIC:
7263 base = gen_reg_rtx (Pmode);
7264 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7265
7266 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7267 {
7268 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7269
7270 start_sequence ();
7271 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7272 insns = get_insns ();
7273 end_sequence ();
7274
7275 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7276 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7277 emit_libcall_block (insns, base, rax, note);
7278 }
7279 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7280 emit_insn (gen_tls_local_dynamic_base_64 (base));
7281 else
7282 emit_insn (gen_tls_local_dynamic_base_32 (base));
7283
7284 if (TARGET_GNU2_TLS)
7285 {
7286 rtx x = ix86_tls_module_base ();
7287
7288 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7289 gen_rtx_MINUS (Pmode, x, tp));
7290 }
7291
7292 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7293 off = gen_rtx_CONST (Pmode, off);
7294
7295 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7296
7297 if (TARGET_GNU2_TLS)
7298 {
7299 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7300
7301 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7302 }
7303
7304 break;
7305
7306 case TLS_MODEL_INITIAL_EXEC:
7307 if (TARGET_64BIT)
7308 {
7309 pic = NULL;
7310 type = UNSPEC_GOTNTPOFF;
7311 }
7312 else if (flag_pic)
7313 {
7314 if (reload_in_progress)
7315 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7316 pic = pic_offset_table_rtx;
7317 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7318 }
7319 else if (!TARGET_ANY_GNU_TLS)
7320 {
7321 pic = gen_reg_rtx (Pmode);
7322 emit_insn (gen_set_got (pic));
7323 type = UNSPEC_GOTTPOFF;
7324 }
7325 else
7326 {
7327 pic = NULL;
7328 type = UNSPEC_INDNTPOFF;
7329 }
7330
7331 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7332 off = gen_rtx_CONST (Pmode, off);
7333 if (pic)
7334 off = gen_rtx_PLUS (Pmode, pic, off);
7335 off = gen_const_mem (Pmode, off);
7336 set_mem_alias_set (off, ix86_GOT_alias_set ());
7337
7338 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7339 {
7340 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7341 off = force_reg (Pmode, off);
7342 return gen_rtx_PLUS (Pmode, base, off);
7343 }
7344 else
7345 {
7346 base = get_thread_pointer (true);
7347 dest = gen_reg_rtx (Pmode);
7348 emit_insn (gen_subsi3 (dest, base, off));
7349 }
7350 break;
7351
7352 case TLS_MODEL_LOCAL_EXEC:
7353 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7354 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7355 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7356 off = gen_rtx_CONST (Pmode, off);
7357
7358 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7359 {
7360 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7361 return gen_rtx_PLUS (Pmode, base, off);
7362 }
7363 else
7364 {
7365 base = get_thread_pointer (true);
7366 dest = gen_reg_rtx (Pmode);
7367 emit_insn (gen_subsi3 (dest, base, off));
7368 }
7369 break;
7370
7371 default:
7372 gcc_unreachable ();
7373 }
7374
7375 return dest;
7376 }
7377
7378 /* Try machine-dependent ways of modifying an illegitimate address
7379 to be legitimate. If we find one, return the new, valid address.
7380 This macro is used in only one place: `memory_address' in explow.c.
7381
7382 OLDX is the address as it was before break_out_memory_refs was called.
7383 In some cases it is useful to look at this to decide what needs to be done.
7384
7385 MODE and WIN are passed so that this macro can use
7386 GO_IF_LEGITIMATE_ADDRESS.
7387
7388 It is always safe for this macro to do nothing. It exists to recognize
7389 opportunities to optimize the output.
7390
7391 For the 80386, we handle X+REG by loading X into a register R and
7392 using R+REG. R will go in a general reg and indexing will be used.
7393 However, if REG is a broken-out memory address or multiplication,
7394 nothing needs to be done because REG can certainly go in a general reg.
7395
7396 When -fpic is used, special handling is needed for symbolic references.
7397 See comments by legitimize_pic_address in i386.c for details. */
7398
7399 rtx
7400 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7401 {
7402 int changed = 0;
7403 unsigned log;
7404
7405 if (TARGET_DEBUG_ADDR)
7406 {
7407 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7408 GET_MODE_NAME (mode));
7409 debug_rtx (x);
7410 }
7411
7412 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7413 if (log)
7414 return legitimize_tls_address (x, log, false);
7415 if (GET_CODE (x) == CONST
7416 && GET_CODE (XEXP (x, 0)) == PLUS
7417 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7418 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7419 {
7420 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7421 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7422 }
7423
7424 if (flag_pic && SYMBOLIC_CONST (x))
7425 return legitimize_pic_address (x, 0);
7426
7427 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7428 if (GET_CODE (x) == ASHIFT
7429 && CONST_INT_P (XEXP (x, 1))
7430 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7431 {
7432 changed = 1;
7433 log = INTVAL (XEXP (x, 1));
7434 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7435 GEN_INT (1 << log));
7436 }
7437
7438 if (GET_CODE (x) == PLUS)
7439 {
7440 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7441
7442 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7443 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7444 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7445 {
7446 changed = 1;
7447 log = INTVAL (XEXP (XEXP (x, 0), 1));
7448 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7449 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7450 GEN_INT (1 << log));
7451 }
7452
7453 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7454 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7455 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7456 {
7457 changed = 1;
7458 log = INTVAL (XEXP (XEXP (x, 1), 1));
7459 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7460 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7461 GEN_INT (1 << log));
7462 }
7463
7464 /* Put multiply first if it isn't already. */
7465 if (GET_CODE (XEXP (x, 1)) == MULT)
7466 {
7467 rtx tmp = XEXP (x, 0);
7468 XEXP (x, 0) = XEXP (x, 1);
7469 XEXP (x, 1) = tmp;
7470 changed = 1;
7471 }
7472
7473 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7474 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7475 created by virtual register instantiation, register elimination, and
7476 similar optimizations. */
7477 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7478 {
7479 changed = 1;
7480 x = gen_rtx_PLUS (Pmode,
7481 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7482 XEXP (XEXP (x, 1), 0)),
7483 XEXP (XEXP (x, 1), 1));
7484 }
7485
7486 /* Canonicalize
7487 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7488 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7489 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7490 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7491 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7492 && CONSTANT_P (XEXP (x, 1)))
7493 {
7494 rtx constant;
7495 rtx other = NULL_RTX;
7496
7497 if (CONST_INT_P (XEXP (x, 1)))
7498 {
7499 constant = XEXP (x, 1);
7500 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7501 }
7502 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7503 {
7504 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7505 other = XEXP (x, 1);
7506 }
7507 else
7508 constant = 0;
7509
7510 if (constant)
7511 {
7512 changed = 1;
7513 x = gen_rtx_PLUS (Pmode,
7514 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7515 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7516 plus_constant (other, INTVAL (constant)));
7517 }
7518 }
7519
7520 if (changed && legitimate_address_p (mode, x, FALSE))
7521 return x;
7522
7523 if (GET_CODE (XEXP (x, 0)) == MULT)
7524 {
7525 changed = 1;
7526 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7527 }
7528
7529 if (GET_CODE (XEXP (x, 1)) == MULT)
7530 {
7531 changed = 1;
7532 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7533 }
7534
7535 if (changed
7536 && REG_P (XEXP (x, 1))
7537 && REG_P (XEXP (x, 0)))
7538 return x;
7539
7540 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7541 {
7542 changed = 1;
7543 x = legitimize_pic_address (x, 0);
7544 }
7545
7546 if (changed && legitimate_address_p (mode, x, FALSE))
7547 return x;
7548
7549 if (REG_P (XEXP (x, 0)))
7550 {
7551 rtx temp = gen_reg_rtx (Pmode);
7552 rtx val = force_operand (XEXP (x, 1), temp);
7553 if (val != temp)
7554 emit_move_insn (temp, val);
7555
7556 XEXP (x, 1) = temp;
7557 return x;
7558 }
7559
7560 else if (REG_P (XEXP (x, 1)))
7561 {
7562 rtx temp = gen_reg_rtx (Pmode);
7563 rtx val = force_operand (XEXP (x, 0), temp);
7564 if (val != temp)
7565 emit_move_insn (temp, val);
7566
7567 XEXP (x, 0) = temp;
7568 return x;
7569 }
7570 }
7571
7572 return x;
7573 }
7574 \f
7575 /* Print an integer constant expression in assembler syntax. Addition
7576 and subtraction are the only arithmetic that may appear in these
7577 expressions. FILE is the stdio stream to write to, X is the rtx, and
7578 CODE is the operand print code from the output string. */
7579
7580 static void
7581 output_pic_addr_const (FILE *file, rtx x, int code)
7582 {
7583 char buf[256];
7584
7585 switch (GET_CODE (x))
7586 {
7587 case PC:
7588 gcc_assert (flag_pic);
7589 putc ('.', file);
7590 break;
7591
7592 case SYMBOL_REF:
7593 if (! TARGET_MACHO || TARGET_64BIT)
7594 output_addr_const (file, x);
7595 else
7596 {
7597 const char *name = XSTR (x, 0);
7598
7599 /* Mark the decl as referenced so that cgraph will output the function. */
7600 if (SYMBOL_REF_DECL (x))
7601 mark_decl_referenced (SYMBOL_REF_DECL (x));
7602
7603 #if TARGET_MACHO
7604 if (MACHOPIC_INDIRECT
7605 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7606 name = machopic_indirection_name (x, /*stub_p=*/true);
7607 #endif
7608 assemble_name (file, name);
7609 }
7610 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7611 fputs ("@PLT", file);
7612 break;
7613
7614 case LABEL_REF:
7615 x = XEXP (x, 0);
7616 /* FALLTHRU */
7617 case CODE_LABEL:
7618 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7619 assemble_name (asm_out_file, buf);
7620 break;
7621
7622 case CONST_INT:
7623 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7624 break;
7625
7626 case CONST:
7627 /* This used to output parentheses around the expression,
7628 but that does not work on the 386 (either ATT or BSD assembler). */
7629 output_pic_addr_const (file, XEXP (x, 0), code);
7630 break;
7631
7632 case CONST_DOUBLE:
7633 if (GET_MODE (x) == VOIDmode)
7634 {
7635 /* We can use %d if the number is <32 bits and positive. */
7636 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7637 fprintf (file, "0x%lx%08lx",
7638 (unsigned long) CONST_DOUBLE_HIGH (x),
7639 (unsigned long) CONST_DOUBLE_LOW (x));
7640 else
7641 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7642 }
7643 else
7644 /* We can't handle floating point constants;
7645 PRINT_OPERAND must handle them. */
7646 output_operand_lossage ("floating constant misused");
7647 break;
7648
7649 case PLUS:
7650 /* Some assemblers need integer constants to appear first. */
7651 if (CONST_INT_P (XEXP (x, 0)))
7652 {
7653 output_pic_addr_const (file, XEXP (x, 0), code);
7654 putc ('+', file);
7655 output_pic_addr_const (file, XEXP (x, 1), code);
7656 }
7657 else
7658 {
7659 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7660 output_pic_addr_const (file, XEXP (x, 1), code);
7661 putc ('+', file);
7662 output_pic_addr_const (file, XEXP (x, 0), code);
7663 }
7664 break;
7665
7666 case MINUS:
7667 if (!TARGET_MACHO)
7668 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7669 output_pic_addr_const (file, XEXP (x, 0), code);
7670 putc ('-', file);
7671 output_pic_addr_const (file, XEXP (x, 1), code);
7672 if (!TARGET_MACHO)
7673 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7674 break;
7675
7676 case UNSPEC:
7677 gcc_assert (XVECLEN (x, 0) == 1);
7678 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7679 switch (XINT (x, 1))
7680 {
7681 case UNSPEC_GOT:
7682 fputs ("@GOT", file);
7683 break;
7684 case UNSPEC_GOTOFF:
7685 fputs ("@GOTOFF", file);
7686 break;
7687 case UNSPEC_GOTPCREL:
7688 fputs ("@GOTPCREL(%rip)", file);
7689 break;
7690 case UNSPEC_GOTTPOFF:
7691 /* FIXME: This might be @TPOFF in Sun ld too. */
7692 fputs ("@GOTTPOFF", file);
7693 break;
7694 case UNSPEC_TPOFF:
7695 fputs ("@TPOFF", file);
7696 break;
7697 case UNSPEC_NTPOFF:
7698 if (TARGET_64BIT)
7699 fputs ("@TPOFF", file);
7700 else
7701 fputs ("@NTPOFF", file);
7702 break;
7703 case UNSPEC_DTPOFF:
7704 fputs ("@DTPOFF", file);
7705 break;
7706 case UNSPEC_GOTNTPOFF:
7707 if (TARGET_64BIT)
7708 fputs ("@GOTTPOFF(%rip)", file);
7709 else
7710 fputs ("@GOTNTPOFF", file);
7711 break;
7712 case UNSPEC_INDNTPOFF:
7713 fputs ("@INDNTPOFF", file);
7714 break;
7715 default:
7716 output_operand_lossage ("invalid UNSPEC as operand");
7717 break;
7718 }
7719 break;
7720
7721 default:
7722 output_operand_lossage ("invalid expression as operand");
7723 }
7724 }
7725
7726 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7727 We need to emit DTP-relative relocations. */
7728
7729 static void
7730 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7731 {
7732 fputs (ASM_LONG, file);
7733 output_addr_const (file, x);
7734 fputs ("@DTPOFF", file);
7735 switch (size)
7736 {
7737 case 4:
7738 break;
7739 case 8:
7740 fputs (", 0", file);
7741 break;
7742 default:
7743 gcc_unreachable ();
7744 }
7745 }
7746
7747 /* In the name of slightly smaller debug output, and to cater to
7748 general assembler lossage, recognize PIC+GOTOFF and turn it back
7749 into a direct symbol reference.
7750
7751 On Darwin, this is necessary to avoid a crash, because Darwin
7752 has a different PIC label for each routine but the DWARF debugging
7753 information is not associated with any particular routine, so it's
7754 necessary to remove references to the PIC label from RTL stored by
7755 the DWARF output code. */
7756
7757 static rtx
7758 ix86_delegitimize_address (rtx orig_x)
7759 {
7760 rtx x = orig_x;
7761 /* reg_addend is NULL or a multiple of some register. */
7762 rtx reg_addend = NULL_RTX;
7763 /* const_addend is NULL or a const_int. */
7764 rtx const_addend = NULL_RTX;
7765 /* This is the result, or NULL. */
7766 rtx result = NULL_RTX;
7767
7768 if (MEM_P (x))
7769 x = XEXP (x, 0);
7770
7771 if (TARGET_64BIT)
7772 {
7773 if (GET_CODE (x) != CONST
7774 || GET_CODE (XEXP (x, 0)) != UNSPEC
7775 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7776 || !MEM_P (orig_x))
7777 return orig_x;
7778 return XVECEXP (XEXP (x, 0), 0, 0);
7779 }
7780
7781 if (GET_CODE (x) != PLUS
7782 || GET_CODE (XEXP (x, 1)) != CONST)
7783 return orig_x;
7784
7785 if (REG_P (XEXP (x, 0))
7786 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7787 /* %ebx + GOT/GOTOFF */
7788 ;
7789 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7790 {
7791 /* %ebx + %reg * scale + GOT/GOTOFF */
7792 reg_addend = XEXP (x, 0);
7793 if (REG_P (XEXP (reg_addend, 0))
7794 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7795 reg_addend = XEXP (reg_addend, 1);
7796 else if (REG_P (XEXP (reg_addend, 1))
7797 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7798 reg_addend = XEXP (reg_addend, 0);
7799 else
7800 return orig_x;
7801 if (!REG_P (reg_addend)
7802 && GET_CODE (reg_addend) != MULT
7803 && GET_CODE (reg_addend) != ASHIFT)
7804 return orig_x;
7805 }
7806 else
7807 return orig_x;
7808
7809 x = XEXP (XEXP (x, 1), 0);
7810 if (GET_CODE (x) == PLUS
7811 && CONST_INT_P (XEXP (x, 1)))
7812 {
7813 const_addend = XEXP (x, 1);
7814 x = XEXP (x, 0);
7815 }
7816
7817 if (GET_CODE (x) == UNSPEC
7818 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7819 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7820 result = XVECEXP (x, 0, 0);
7821
7822 if (TARGET_MACHO && darwin_local_data_pic (x)
7823 && !MEM_P (orig_x))
7824 result = XEXP (x, 0);
7825
7826 if (! result)
7827 return orig_x;
7828
7829 if (const_addend)
7830 result = gen_rtx_PLUS (Pmode, result, const_addend);
7831 if (reg_addend)
7832 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7833 return result;
7834 }
7835 \f
7836 static void
7837 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7838 int fp, FILE *file)
7839 {
7840 const char *suffix;
7841
7842 if (mode == CCFPmode || mode == CCFPUmode)
7843 {
7844 enum rtx_code second_code, bypass_code;
7845 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7846 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7847 code = ix86_fp_compare_code_to_integer (code);
7848 mode = CCmode;
7849 }
7850 if (reverse)
7851 code = reverse_condition (code);
7852
7853 switch (code)
7854 {
7855 case EQ:
7856 suffix = "e";
7857 break;
7858 case NE:
7859 suffix = "ne";
7860 break;
7861 case GT:
7862 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7863 suffix = "g";
7864 break;
7865 case GTU:
7866 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7867 Those same assemblers have the same but opposite lossage on cmov. */
7868 gcc_assert (mode == CCmode);
7869 suffix = fp ? "nbe" : "a";
7870 break;
7871 case LT:
7872 switch (mode)
7873 {
7874 case CCNOmode:
7875 case CCGOCmode:
7876 suffix = "s";
7877 break;
7878
7879 case CCmode:
7880 case CCGCmode:
7881 suffix = "l";
7882 break;
7883
7884 default:
7885 gcc_unreachable ();
7886 }
7887 break;
7888 case LTU:
7889 gcc_assert (mode == CCmode);
7890 suffix = "b";
7891 break;
7892 case GE:
7893 switch (mode)
7894 {
7895 case CCNOmode:
7896 case CCGOCmode:
7897 suffix = "ns";
7898 break;
7899
7900 case CCmode:
7901 case CCGCmode:
7902 suffix = "ge";
7903 break;
7904
7905 default:
7906 gcc_unreachable ();
7907 }
7908 break;
7909 case GEU:
7910 /* ??? As above. */
7911 gcc_assert (mode == CCmode);
7912 suffix = fp ? "nb" : "ae";
7913 break;
7914 case LE:
7915 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7916 suffix = "le";
7917 break;
7918 case LEU:
7919 gcc_assert (mode == CCmode);
7920 suffix = "be";
7921 break;
7922 case UNORDERED:
7923 suffix = fp ? "u" : "p";
7924 break;
7925 case ORDERED:
7926 suffix = fp ? "nu" : "np";
7927 break;
7928 default:
7929 gcc_unreachable ();
7930 }
7931 fputs (suffix, file);
7932 }
7933
7934 /* Print the name of register X to FILE based on its machine mode and number.
7935 If CODE is 'w', pretend the mode is HImode.
7936 If CODE is 'b', pretend the mode is QImode.
7937 If CODE is 'k', pretend the mode is SImode.
7938 If CODE is 'q', pretend the mode is DImode.
7939 If CODE is 'h', pretend the reg is the 'high' byte register.
7940 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7941
7942 void
7943 print_reg (rtx x, int code, FILE *file)
7944 {
7945 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7946 && REGNO (x) != FRAME_POINTER_REGNUM
7947 && REGNO (x) != FLAGS_REG
7948 && REGNO (x) != FPSR_REG
7949 && REGNO (x) != FPCR_REG);
7950
7951 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7952 putc ('%', file);
7953
7954 if (code == 'w' || MMX_REG_P (x))
7955 code = 2;
7956 else if (code == 'b')
7957 code = 1;
7958 else if (code == 'k')
7959 code = 4;
7960 else if (code == 'q')
7961 code = 8;
7962 else if (code == 'y')
7963 code = 3;
7964 else if (code == 'h')
7965 code = 0;
7966 else
7967 code = GET_MODE_SIZE (GET_MODE (x));
7968
7969 /* Irritatingly, AMD extended registers use different naming convention
7970 from the normal registers. */
7971 if (REX_INT_REG_P (x))
7972 {
7973 gcc_assert (TARGET_64BIT);
7974 switch (code)
7975 {
7976 case 0:
7977 error ("extended registers have no high halves");
7978 break;
7979 case 1:
7980 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7981 break;
7982 case 2:
7983 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7984 break;
7985 case 4:
7986 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7987 break;
7988 case 8:
7989 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7990 break;
7991 default:
7992 error ("unsupported operand size for extended register");
7993 break;
7994 }
7995 return;
7996 }
7997 switch (code)
7998 {
7999 case 3:
8000 if (STACK_TOP_P (x))
8001 {
8002 fputs ("st(0)", file);
8003 break;
8004 }
8005 /* FALLTHRU */
8006 case 8:
8007 case 4:
8008 case 12:
8009 if (! ANY_FP_REG_P (x))
8010 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8011 /* FALLTHRU */
8012 case 16:
8013 case 2:
8014 normal:
8015 fputs (hi_reg_name[REGNO (x)], file);
8016 break;
8017 case 1:
8018 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8019 goto normal;
8020 fputs (qi_reg_name[REGNO (x)], file);
8021 break;
8022 case 0:
8023 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8024 goto normal;
8025 fputs (qi_high_reg_name[REGNO (x)], file);
8026 break;
8027 default:
8028 gcc_unreachable ();
8029 }
8030 }
8031
8032 /* Locate some local-dynamic symbol still in use by this function
8033 so that we can print its name in some tls_local_dynamic_base
8034 pattern. */
8035
8036 static const char *
8037 get_some_local_dynamic_name (void)
8038 {
8039 rtx insn;
8040
8041 if (cfun->machine->some_ld_name)
8042 return cfun->machine->some_ld_name;
8043
8044 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8045 if (INSN_P (insn)
8046 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8047 return cfun->machine->some_ld_name;
8048
8049 gcc_unreachable ();
8050 }
8051
8052 static int
8053 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8054 {
8055 rtx x = *px;
8056
8057 if (GET_CODE (x) == SYMBOL_REF
8058 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8059 {
8060 cfun->machine->some_ld_name = XSTR (x, 0);
8061 return 1;
8062 }
8063
8064 return 0;
8065 }
8066
8067 /* Meaning of CODE:
8068 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8069 C -- print opcode suffix for set/cmov insn.
8070 c -- like C, but print reversed condition
8071 F,f -- likewise, but for floating-point.
8072 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8073 otherwise nothing
8074 R -- print the prefix for register names.
8075 z -- print the opcode suffix for the size of the current operand.
8076 * -- print a star (in certain assembler syntax)
8077 A -- print an absolute memory reference.
8078 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8079 s -- print a shift double count, followed by the assemblers argument
8080 delimiter.
8081 b -- print the QImode name of the register for the indicated operand.
8082 %b0 would print %al if operands[0] is reg 0.
8083 w -- likewise, print the HImode name of the register.
8084 k -- likewise, print the SImode name of the register.
8085 q -- likewise, print the DImode name of the register.
8086 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8087 y -- print "st(0)" instead of "st" as a register.
8088 D -- print condition for SSE cmp instruction.
8089 P -- if PIC, print an @PLT suffix.
8090 X -- don't print any sort of PIC '@' suffix for a symbol.
8091 & -- print some in-use local-dynamic symbol name.
8092 H -- print a memory address offset by 8; used for sse high-parts
8093 */
8094
8095 void
8096 print_operand (FILE *file, rtx x, int code)
8097 {
8098 if (code)
8099 {
8100 switch (code)
8101 {
8102 case '*':
8103 if (ASSEMBLER_DIALECT == ASM_ATT)
8104 putc ('*', file);
8105 return;
8106
8107 case '&':
8108 assemble_name (file, get_some_local_dynamic_name ());
8109 return;
8110
8111 case 'A':
8112 switch (ASSEMBLER_DIALECT)
8113 {
8114 case ASM_ATT:
8115 putc ('*', file);
8116 break;
8117
8118 case ASM_INTEL:
8119 /* Intel syntax. For absolute addresses, registers should not
8120 be surrounded by braces. */
8121 if (!REG_P (x))
8122 {
8123 putc ('[', file);
8124 PRINT_OPERAND (file, x, 0);
8125 putc (']', file);
8126 return;
8127 }
8128 break;
8129
8130 default:
8131 gcc_unreachable ();
8132 }
8133
8134 PRINT_OPERAND (file, x, 0);
8135 return;
8136
8137
8138 case 'L':
8139 if (ASSEMBLER_DIALECT == ASM_ATT)
8140 putc ('l', file);
8141 return;
8142
8143 case 'W':
8144 if (ASSEMBLER_DIALECT == ASM_ATT)
8145 putc ('w', file);
8146 return;
8147
8148 case 'B':
8149 if (ASSEMBLER_DIALECT == ASM_ATT)
8150 putc ('b', file);
8151 return;
8152
8153 case 'Q':
8154 if (ASSEMBLER_DIALECT == ASM_ATT)
8155 putc ('l', file);
8156 return;
8157
8158 case 'S':
8159 if (ASSEMBLER_DIALECT == ASM_ATT)
8160 putc ('s', file);
8161 return;
8162
8163 case 'T':
8164 if (ASSEMBLER_DIALECT == ASM_ATT)
8165 putc ('t', file);
8166 return;
8167
8168 case 'z':
8169 /* 387 opcodes don't get size suffixes if the operands are
8170 registers. */
8171 if (STACK_REG_P (x))
8172 return;
8173
8174 /* Likewise if using Intel opcodes. */
8175 if (ASSEMBLER_DIALECT == ASM_INTEL)
8176 return;
8177
8178 /* This is the size of op from size of operand. */
8179 switch (GET_MODE_SIZE (GET_MODE (x)))
8180 {
8181 case 1:
8182 putc ('b', file);
8183 return;
8184
8185 case 2:
8186 #ifdef HAVE_GAS_FILDS_FISTS
8187 putc ('s', file);
8188 #endif
8189 return;
8190
8191 case 4:
8192 if (GET_MODE (x) == SFmode)
8193 {
8194 putc ('s', file);
8195 return;
8196 }
8197 else
8198 putc ('l', file);
8199 return;
8200
8201 case 12:
8202 case 16:
8203 putc ('t', file);
8204 return;
8205
8206 case 8:
8207 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8208 {
8209 #ifdef GAS_MNEMONICS
8210 putc ('q', file);
8211 #else
8212 putc ('l', file);
8213 putc ('l', file);
8214 #endif
8215 }
8216 else
8217 putc ('l', file);
8218 return;
8219
8220 default:
8221 gcc_unreachable ();
8222 }
8223
8224 case 'b':
8225 case 'w':
8226 case 'k':
8227 case 'q':
8228 case 'h':
8229 case 'y':
8230 case 'X':
8231 case 'P':
8232 break;
8233
8234 case 's':
8235 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8236 {
8237 PRINT_OPERAND (file, x, 0);
8238 putc (',', file);
8239 }
8240 return;
8241
8242 case 'D':
8243 /* Little bit of braindamage here. The SSE compare instructions
8244 does use completely different names for the comparisons that the
8245 fp conditional moves. */
8246 switch (GET_CODE (x))
8247 {
8248 case EQ:
8249 case UNEQ:
8250 fputs ("eq", file);
8251 break;
8252 case LT:
8253 case UNLT:
8254 fputs ("lt", file);
8255 break;
8256 case LE:
8257 case UNLE:
8258 fputs ("le", file);
8259 break;
8260 case UNORDERED:
8261 fputs ("unord", file);
8262 break;
8263 case NE:
8264 case LTGT:
8265 fputs ("neq", file);
8266 break;
8267 case UNGE:
8268 case GE:
8269 fputs ("nlt", file);
8270 break;
8271 case UNGT:
8272 case GT:
8273 fputs ("nle", file);
8274 break;
8275 case ORDERED:
8276 fputs ("ord", file);
8277 break;
8278 default:
8279 gcc_unreachable ();
8280 }
8281 return;
8282 case 'O':
8283 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8284 if (ASSEMBLER_DIALECT == ASM_ATT)
8285 {
8286 switch (GET_MODE (x))
8287 {
8288 case HImode: putc ('w', file); break;
8289 case SImode:
8290 case SFmode: putc ('l', file); break;
8291 case DImode:
8292 case DFmode: putc ('q', file); break;
8293 default: gcc_unreachable ();
8294 }
8295 putc ('.', file);
8296 }
8297 #endif
8298 return;
8299 case 'C':
8300 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8301 return;
8302 case 'F':
8303 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8304 if (ASSEMBLER_DIALECT == ASM_ATT)
8305 putc ('.', file);
8306 #endif
8307 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8308 return;
8309
8310 /* Like above, but reverse condition */
8311 case 'c':
8312 /* Check to see if argument to %c is really a constant
8313 and not a condition code which needs to be reversed. */
8314 if (!COMPARISON_P (x))
8315 {
8316 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8317 return;
8318 }
8319 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8320 return;
8321 case 'f':
8322 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8323 if (ASSEMBLER_DIALECT == ASM_ATT)
8324 putc ('.', file);
8325 #endif
8326 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8327 return;
8328
8329 case 'H':
8330 /* It doesn't actually matter what mode we use here, as we're
8331 only going to use this for printing. */
8332 x = adjust_address_nv (x, DImode, 8);
8333 break;
8334
8335 case '+':
8336 {
8337 rtx x;
8338
8339 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8340 return;
8341
8342 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8343 if (x)
8344 {
8345 int pred_val = INTVAL (XEXP (x, 0));
8346
8347 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8348 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8349 {
8350 int taken = pred_val > REG_BR_PROB_BASE / 2;
8351 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8352
8353 /* Emit hints only in the case default branch prediction
8354 heuristics would fail. */
8355 if (taken != cputaken)
8356 {
8357 /* We use 3e (DS) prefix for taken branches and
8358 2e (CS) prefix for not taken branches. */
8359 if (taken)
8360 fputs ("ds ; ", file);
8361 else
8362 fputs ("cs ; ", file);
8363 }
8364 }
8365 }
8366 return;
8367 }
8368 default:
8369 output_operand_lossage ("invalid operand code '%c'", code);
8370 }
8371 }
8372
8373 if (REG_P (x))
8374 print_reg (x, code, file);
8375
8376 else if (MEM_P (x))
8377 {
8378 /* No `byte ptr' prefix for call instructions. */
8379 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8380 {
8381 const char * size;
8382 switch (GET_MODE_SIZE (GET_MODE (x)))
8383 {
8384 case 1: size = "BYTE"; break;
8385 case 2: size = "WORD"; break;
8386 case 4: size = "DWORD"; break;
8387 case 8: size = "QWORD"; break;
8388 case 12: size = "XWORD"; break;
8389 case 16: size = "XMMWORD"; break;
8390 default:
8391 gcc_unreachable ();
8392 }
8393
8394 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8395 if (code == 'b')
8396 size = "BYTE";
8397 else if (code == 'w')
8398 size = "WORD";
8399 else if (code == 'k')
8400 size = "DWORD";
8401
8402 fputs (size, file);
8403 fputs (" PTR ", file);
8404 }
8405
8406 x = XEXP (x, 0);
8407 /* Avoid (%rip) for call operands. */
8408 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8409 && !CONST_INT_P (x))
8410 output_addr_const (file, x);
8411 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8412 output_operand_lossage ("invalid constraints for operand");
8413 else
8414 output_address (x);
8415 }
8416
8417 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8418 {
8419 REAL_VALUE_TYPE r;
8420 long l;
8421
8422 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8423 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8424
8425 if (ASSEMBLER_DIALECT == ASM_ATT)
8426 putc ('$', file);
8427 fprintf (file, "0x%08lx", l);
8428 }
8429
8430 /* These float cases don't actually occur as immediate operands. */
8431 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8432 {
8433 char dstr[30];
8434
8435 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8436 fprintf (file, "%s", dstr);
8437 }
8438
8439 else if (GET_CODE (x) == CONST_DOUBLE
8440 && GET_MODE (x) == XFmode)
8441 {
8442 char dstr[30];
8443
8444 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8445 fprintf (file, "%s", dstr);
8446 }
8447
8448 else
8449 {
8450 /* We have patterns that allow zero sets of memory, for instance.
8451 In 64-bit mode, we should probably support all 8-byte vectors,
8452 since we can in fact encode that into an immediate. */
8453 if (GET_CODE (x) == CONST_VECTOR)
8454 {
8455 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8456 x = const0_rtx;
8457 }
8458
8459 if (code != 'P')
8460 {
8461 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8462 {
8463 if (ASSEMBLER_DIALECT == ASM_ATT)
8464 putc ('$', file);
8465 }
8466 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8467 || GET_CODE (x) == LABEL_REF)
8468 {
8469 if (ASSEMBLER_DIALECT == ASM_ATT)
8470 putc ('$', file);
8471 else
8472 fputs ("OFFSET FLAT:", file);
8473 }
8474 }
8475 if (CONST_INT_P (x))
8476 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8477 else if (flag_pic)
8478 output_pic_addr_const (file, x, code);
8479 else
8480 output_addr_const (file, x);
8481 }
8482 }
8483 \f
8484 /* Print a memory operand whose address is ADDR. */
8485
8486 void
8487 print_operand_address (FILE *file, rtx addr)
8488 {
8489 struct ix86_address parts;
8490 rtx base, index, disp;
8491 int scale;
8492 int ok = ix86_decompose_address (addr, &parts);
8493
8494 gcc_assert (ok);
8495
8496 base = parts.base;
8497 index = parts.index;
8498 disp = parts.disp;
8499 scale = parts.scale;
8500
8501 switch (parts.seg)
8502 {
8503 case SEG_DEFAULT:
8504 break;
8505 case SEG_FS:
8506 case SEG_GS:
8507 if (USER_LABEL_PREFIX[0] == 0)
8508 putc ('%', file);
8509 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8510 break;
8511 default:
8512 gcc_unreachable ();
8513 }
8514
8515 if (!base && !index)
8516 {
8517 /* Displacement only requires special attention. */
8518
8519 if (CONST_INT_P (disp))
8520 {
8521 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8522 {
8523 if (USER_LABEL_PREFIX[0] == 0)
8524 putc ('%', file);
8525 fputs ("ds:", file);
8526 }
8527 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8528 }
8529 else if (flag_pic)
8530 output_pic_addr_const (file, disp, 0);
8531 else
8532 output_addr_const (file, disp);
8533
8534 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8535 if (TARGET_64BIT)
8536 {
8537 if (GET_CODE (disp) == CONST
8538 && GET_CODE (XEXP (disp, 0)) == PLUS
8539 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8540 disp = XEXP (XEXP (disp, 0), 0);
8541 if (GET_CODE (disp) == LABEL_REF
8542 || (GET_CODE (disp) == SYMBOL_REF
8543 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8544 fputs ("(%rip)", file);
8545 }
8546 }
8547 else
8548 {
8549 if (ASSEMBLER_DIALECT == ASM_ATT)
8550 {
8551 if (disp)
8552 {
8553 if (flag_pic)
8554 output_pic_addr_const (file, disp, 0);
8555 else if (GET_CODE (disp) == LABEL_REF)
8556 output_asm_label (disp);
8557 else
8558 output_addr_const (file, disp);
8559 }
8560
8561 putc ('(', file);
8562 if (base)
8563 print_reg (base, 0, file);
8564 if (index)
8565 {
8566 putc (',', file);
8567 print_reg (index, 0, file);
8568 if (scale != 1)
8569 fprintf (file, ",%d", scale);
8570 }
8571 putc (')', file);
8572 }
8573 else
8574 {
8575 rtx offset = NULL_RTX;
8576
8577 if (disp)
8578 {
8579 /* Pull out the offset of a symbol; print any symbol itself. */
8580 if (GET_CODE (disp) == CONST
8581 && GET_CODE (XEXP (disp, 0)) == PLUS
8582 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8583 {
8584 offset = XEXP (XEXP (disp, 0), 1);
8585 disp = gen_rtx_CONST (VOIDmode,
8586 XEXP (XEXP (disp, 0), 0));
8587 }
8588
8589 if (flag_pic)
8590 output_pic_addr_const (file, disp, 0);
8591 else if (GET_CODE (disp) == LABEL_REF)
8592 output_asm_label (disp);
8593 else if (CONST_INT_P (disp))
8594 offset = disp;
8595 else
8596 output_addr_const (file, disp);
8597 }
8598
8599 putc ('[', file);
8600 if (base)
8601 {
8602 print_reg (base, 0, file);
8603 if (offset)
8604 {
8605 if (INTVAL (offset) >= 0)
8606 putc ('+', file);
8607 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8608 }
8609 }
8610 else if (offset)
8611 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8612 else
8613 putc ('0', file);
8614
8615 if (index)
8616 {
8617 putc ('+', file);
8618 print_reg (index, 0, file);
8619 if (scale != 1)
8620 fprintf (file, "*%d", scale);
8621 }
8622 putc (']', file);
8623 }
8624 }
8625 }
8626
8627 bool
8628 output_addr_const_extra (FILE *file, rtx x)
8629 {
8630 rtx op;
8631
8632 if (GET_CODE (x) != UNSPEC)
8633 return false;
8634
8635 op = XVECEXP (x, 0, 0);
8636 switch (XINT (x, 1))
8637 {
8638 case UNSPEC_GOTTPOFF:
8639 output_addr_const (file, op);
8640 /* FIXME: This might be @TPOFF in Sun ld. */
8641 fputs ("@GOTTPOFF", file);
8642 break;
8643 case UNSPEC_TPOFF:
8644 output_addr_const (file, op);
8645 fputs ("@TPOFF", file);
8646 break;
8647 case UNSPEC_NTPOFF:
8648 output_addr_const (file, op);
8649 if (TARGET_64BIT)
8650 fputs ("@TPOFF", file);
8651 else
8652 fputs ("@NTPOFF", file);
8653 break;
8654 case UNSPEC_DTPOFF:
8655 output_addr_const (file, op);
8656 fputs ("@DTPOFF", file);
8657 break;
8658 case UNSPEC_GOTNTPOFF:
8659 output_addr_const (file, op);
8660 if (TARGET_64BIT)
8661 fputs ("@GOTTPOFF(%rip)", file);
8662 else
8663 fputs ("@GOTNTPOFF", file);
8664 break;
8665 case UNSPEC_INDNTPOFF:
8666 output_addr_const (file, op);
8667 fputs ("@INDNTPOFF", file);
8668 break;
8669
8670 default:
8671 return false;
8672 }
8673
8674 return true;
8675 }
8676 \f
8677 /* Split one or more DImode RTL references into pairs of SImode
8678 references. The RTL can be REG, offsettable MEM, integer constant, or
8679 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8680 split and "num" is its length. lo_half and hi_half are output arrays
8681 that parallel "operands". */
8682
8683 void
8684 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8685 {
8686 while (num--)
8687 {
8688 rtx op = operands[num];
8689
8690 /* simplify_subreg refuse to split volatile memory addresses,
8691 but we still have to handle it. */
8692 if (MEM_P (op))
8693 {
8694 lo_half[num] = adjust_address (op, SImode, 0);
8695 hi_half[num] = adjust_address (op, SImode, 4);
8696 }
8697 else
8698 {
8699 lo_half[num] = simplify_gen_subreg (SImode, op,
8700 GET_MODE (op) == VOIDmode
8701 ? DImode : GET_MODE (op), 0);
8702 hi_half[num] = simplify_gen_subreg (SImode, op,
8703 GET_MODE (op) == VOIDmode
8704 ? DImode : GET_MODE (op), 4);
8705 }
8706 }
8707 }
8708 /* Split one or more TImode RTL references into pairs of DImode
8709 references. The RTL can be REG, offsettable MEM, integer constant, or
8710 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8711 split and "num" is its length. lo_half and hi_half are output arrays
8712 that parallel "operands". */
8713
8714 void
8715 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8716 {
8717 while (num--)
8718 {
8719 rtx op = operands[num];
8720
8721 /* simplify_subreg refuse to split volatile memory addresses, but we
8722 still have to handle it. */
8723 if (MEM_P (op))
8724 {
8725 lo_half[num] = adjust_address (op, DImode, 0);
8726 hi_half[num] = adjust_address (op, DImode, 8);
8727 }
8728 else
8729 {
8730 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8731 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8732 }
8733 }
8734 }
8735 \f
8736 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8737 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8738 is the expression of the binary operation. The output may either be
8739 emitted here, or returned to the caller, like all output_* functions.
8740
8741 There is no guarantee that the operands are the same mode, as they
8742 might be within FLOAT or FLOAT_EXTEND expressions. */
8743
8744 #ifndef SYSV386_COMPAT
8745 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8746 wants to fix the assemblers because that causes incompatibility
8747 with gcc. No-one wants to fix gcc because that causes
8748 incompatibility with assemblers... You can use the option of
8749 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8750 #define SYSV386_COMPAT 1
8751 #endif
8752
8753 const char *
8754 output_387_binary_op (rtx insn, rtx *operands)
8755 {
8756 static char buf[30];
8757 const char *p;
8758 const char *ssep;
8759 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8760
8761 #ifdef ENABLE_CHECKING
8762 /* Even if we do not want to check the inputs, this documents input
8763 constraints. Which helps in understanding the following code. */
8764 if (STACK_REG_P (operands[0])
8765 && ((REG_P (operands[1])
8766 && REGNO (operands[0]) == REGNO (operands[1])
8767 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8768 || (REG_P (operands[2])
8769 && REGNO (operands[0]) == REGNO (operands[2])
8770 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8771 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8772 ; /* ok */
8773 else
8774 gcc_assert (is_sse);
8775 #endif
8776
8777 switch (GET_CODE (operands[3]))
8778 {
8779 case PLUS:
8780 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8781 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8782 p = "fiadd";
8783 else
8784 p = "fadd";
8785 ssep = "add";
8786 break;
8787
8788 case MINUS:
8789 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8790 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8791 p = "fisub";
8792 else
8793 p = "fsub";
8794 ssep = "sub";
8795 break;
8796
8797 case MULT:
8798 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8799 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8800 p = "fimul";
8801 else
8802 p = "fmul";
8803 ssep = "mul";
8804 break;
8805
8806 case DIV:
8807 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8808 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8809 p = "fidiv";
8810 else
8811 p = "fdiv";
8812 ssep = "div";
8813 break;
8814
8815 default:
8816 gcc_unreachable ();
8817 }
8818
8819 if (is_sse)
8820 {
8821 strcpy (buf, ssep);
8822 if (GET_MODE (operands[0]) == SFmode)
8823 strcat (buf, "ss\t{%2, %0|%0, %2}");
8824 else
8825 strcat (buf, "sd\t{%2, %0|%0, %2}");
8826 return buf;
8827 }
8828 strcpy (buf, p);
8829
8830 switch (GET_CODE (operands[3]))
8831 {
8832 case MULT:
8833 case PLUS:
8834 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8835 {
8836 rtx temp = operands[2];
8837 operands[2] = operands[1];
8838 operands[1] = temp;
8839 }
8840
8841 /* know operands[0] == operands[1]. */
8842
8843 if (MEM_P (operands[2]))
8844 {
8845 p = "%z2\t%2";
8846 break;
8847 }
8848
8849 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8850 {
8851 if (STACK_TOP_P (operands[0]))
8852 /* How is it that we are storing to a dead operand[2]?
8853 Well, presumably operands[1] is dead too. We can't
8854 store the result to st(0) as st(0) gets popped on this
8855 instruction. Instead store to operands[2] (which I
8856 think has to be st(1)). st(1) will be popped later.
8857 gcc <= 2.8.1 didn't have this check and generated
8858 assembly code that the Unixware assembler rejected. */
8859 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8860 else
8861 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8862 break;
8863 }
8864
8865 if (STACK_TOP_P (operands[0]))
8866 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8867 else
8868 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8869 break;
8870
8871 case MINUS:
8872 case DIV:
8873 if (MEM_P (operands[1]))
8874 {
8875 p = "r%z1\t%1";
8876 break;
8877 }
8878
8879 if (MEM_P (operands[2]))
8880 {
8881 p = "%z2\t%2";
8882 break;
8883 }
8884
8885 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8886 {
8887 #if SYSV386_COMPAT
8888 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8889 derived assemblers, confusingly reverse the direction of
8890 the operation for fsub{r} and fdiv{r} when the
8891 destination register is not st(0). The Intel assembler
8892 doesn't have this brain damage. Read !SYSV386_COMPAT to
8893 figure out what the hardware really does. */
8894 if (STACK_TOP_P (operands[0]))
8895 p = "{p\t%0, %2|rp\t%2, %0}";
8896 else
8897 p = "{rp\t%2, %0|p\t%0, %2}";
8898 #else
8899 if (STACK_TOP_P (operands[0]))
8900 /* As above for fmul/fadd, we can't store to st(0). */
8901 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8902 else
8903 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8904 #endif
8905 break;
8906 }
8907
8908 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8909 {
8910 #if SYSV386_COMPAT
8911 if (STACK_TOP_P (operands[0]))
8912 p = "{rp\t%0, %1|p\t%1, %0}";
8913 else
8914 p = "{p\t%1, %0|rp\t%0, %1}";
8915 #else
8916 if (STACK_TOP_P (operands[0]))
8917 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8918 else
8919 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8920 #endif
8921 break;
8922 }
8923
8924 if (STACK_TOP_P (operands[0]))
8925 {
8926 if (STACK_TOP_P (operands[1]))
8927 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8928 else
8929 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8930 break;
8931 }
8932 else if (STACK_TOP_P (operands[1]))
8933 {
8934 #if SYSV386_COMPAT
8935 p = "{\t%1, %0|r\t%0, %1}";
8936 #else
8937 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8938 #endif
8939 }
8940 else
8941 {
8942 #if SYSV386_COMPAT
8943 p = "{r\t%2, %0|\t%0, %2}";
8944 #else
8945 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8946 #endif
8947 }
8948 break;
8949
8950 default:
8951 gcc_unreachable ();
8952 }
8953
8954 strcat (buf, p);
8955 return buf;
8956 }
8957
8958 /* Return needed mode for entity in optimize_mode_switching pass. */
8959
8960 int
8961 ix86_mode_needed (int entity, rtx insn)
8962 {
8963 enum attr_i387_cw mode;
8964
8965 /* The mode UNINITIALIZED is used to store control word after a
8966 function call or ASM pattern. The mode ANY specify that function
8967 has no requirements on the control word and make no changes in the
8968 bits we are interested in. */
8969
8970 if (CALL_P (insn)
8971 || (NONJUMP_INSN_P (insn)
8972 && (asm_noperands (PATTERN (insn)) >= 0
8973 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8974 return I387_CW_UNINITIALIZED;
8975
8976 if (recog_memoized (insn) < 0)
8977 return I387_CW_ANY;
8978
8979 mode = get_attr_i387_cw (insn);
8980
8981 switch (entity)
8982 {
8983 case I387_TRUNC:
8984 if (mode == I387_CW_TRUNC)
8985 return mode;
8986 break;
8987
8988 case I387_FLOOR:
8989 if (mode == I387_CW_FLOOR)
8990 return mode;
8991 break;
8992
8993 case I387_CEIL:
8994 if (mode == I387_CW_CEIL)
8995 return mode;
8996 break;
8997
8998 case I387_MASK_PM:
8999 if (mode == I387_CW_MASK_PM)
9000 return mode;
9001 break;
9002
9003 default:
9004 gcc_unreachable ();
9005 }
9006
9007 return I387_CW_ANY;
9008 }
9009
9010 /* Output code to initialize control word copies used by trunc?f?i and
9011 rounding patterns. CURRENT_MODE is set to current control word,
9012 while NEW_MODE is set to new control word. */
9013
9014 void
9015 emit_i387_cw_initialization (int mode)
9016 {
9017 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9018 rtx new_mode;
9019
9020 int slot;
9021
9022 rtx reg = gen_reg_rtx (HImode);
9023
9024 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9025 emit_move_insn (reg, copy_rtx (stored_mode));
9026
9027 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9028 {
9029 switch (mode)
9030 {
9031 case I387_CW_TRUNC:
9032 /* round toward zero (truncate) */
9033 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9034 slot = SLOT_CW_TRUNC;
9035 break;
9036
9037 case I387_CW_FLOOR:
9038 /* round down toward -oo */
9039 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9040 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9041 slot = SLOT_CW_FLOOR;
9042 break;
9043
9044 case I387_CW_CEIL:
9045 /* round up toward +oo */
9046 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9047 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9048 slot = SLOT_CW_CEIL;
9049 break;
9050
9051 case I387_CW_MASK_PM:
9052 /* mask precision exception for nearbyint() */
9053 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9054 slot = SLOT_CW_MASK_PM;
9055 break;
9056
9057 default:
9058 gcc_unreachable ();
9059 }
9060 }
9061 else
9062 {
9063 switch (mode)
9064 {
9065 case I387_CW_TRUNC:
9066 /* round toward zero (truncate) */
9067 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9068 slot = SLOT_CW_TRUNC;
9069 break;
9070
9071 case I387_CW_FLOOR:
9072 /* round down toward -oo */
9073 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9074 slot = SLOT_CW_FLOOR;
9075 break;
9076
9077 case I387_CW_CEIL:
9078 /* round up toward +oo */
9079 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9080 slot = SLOT_CW_CEIL;
9081 break;
9082
9083 case I387_CW_MASK_PM:
9084 /* mask precision exception for nearbyint() */
9085 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9086 slot = SLOT_CW_MASK_PM;
9087 break;
9088
9089 default:
9090 gcc_unreachable ();
9091 }
9092 }
9093
9094 gcc_assert (slot < MAX_386_STACK_LOCALS);
9095
9096 new_mode = assign_386_stack_local (HImode, slot);
9097 emit_move_insn (new_mode, reg);
9098 }
9099
9100 /* Output code for INSN to convert a float to a signed int. OPERANDS
9101 are the insn operands. The output may be [HSD]Imode and the input
9102 operand may be [SDX]Fmode. */
9103
9104 const char *
9105 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9106 {
9107 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9108 int dimode_p = GET_MODE (operands[0]) == DImode;
9109 int round_mode = get_attr_i387_cw (insn);
9110
9111 /* Jump through a hoop or two for DImode, since the hardware has no
9112 non-popping instruction. We used to do this a different way, but
9113 that was somewhat fragile and broke with post-reload splitters. */
9114 if ((dimode_p || fisttp) && !stack_top_dies)
9115 output_asm_insn ("fld\t%y1", operands);
9116
9117 gcc_assert (STACK_TOP_P (operands[1]));
9118 gcc_assert (MEM_P (operands[0]));
9119
9120 if (fisttp)
9121 output_asm_insn ("fisttp%z0\t%0", operands);
9122 else
9123 {
9124 if (round_mode != I387_CW_ANY)
9125 output_asm_insn ("fldcw\t%3", operands);
9126 if (stack_top_dies || dimode_p)
9127 output_asm_insn ("fistp%z0\t%0", operands);
9128 else
9129 output_asm_insn ("fist%z0\t%0", operands);
9130 if (round_mode != I387_CW_ANY)
9131 output_asm_insn ("fldcw\t%2", operands);
9132 }
9133
9134 return "";
9135 }
9136
9137 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9138 have the values zero or one, indicates the ffreep insn's operand
9139 from the OPERANDS array. */
9140
9141 static const char *
9142 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9143 {
9144 if (TARGET_USE_FFREEP)
9145 #if HAVE_AS_IX86_FFREEP
9146 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9147 #else
9148 {
9149 static char retval[] = ".word\t0xc_df";
9150 int regno = REGNO (operands[opno]);
9151
9152 gcc_assert (FP_REGNO_P (regno));
9153
9154 retval[9] = '0' + (regno - FIRST_STACK_REG);
9155 return retval;
9156 }
9157 #endif
9158
9159 return opno ? "fstp\t%y1" : "fstp\t%y0";
9160 }
9161
9162
9163 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9164 should be used. UNORDERED_P is true when fucom should be used. */
9165
9166 const char *
9167 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9168 {
9169 int stack_top_dies;
9170 rtx cmp_op0, cmp_op1;
9171 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9172
9173 if (eflags_p)
9174 {
9175 cmp_op0 = operands[0];
9176 cmp_op1 = operands[1];
9177 }
9178 else
9179 {
9180 cmp_op0 = operands[1];
9181 cmp_op1 = operands[2];
9182 }
9183
9184 if (is_sse)
9185 {
9186 if (GET_MODE (operands[0]) == SFmode)
9187 if (unordered_p)
9188 return "ucomiss\t{%1, %0|%0, %1}";
9189 else
9190 return "comiss\t{%1, %0|%0, %1}";
9191 else
9192 if (unordered_p)
9193 return "ucomisd\t{%1, %0|%0, %1}";
9194 else
9195 return "comisd\t{%1, %0|%0, %1}";
9196 }
9197
9198 gcc_assert (STACK_TOP_P (cmp_op0));
9199
9200 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9201
9202 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9203 {
9204 if (stack_top_dies)
9205 {
9206 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9207 return output_387_ffreep (operands, 1);
9208 }
9209 else
9210 return "ftst\n\tfnstsw\t%0";
9211 }
9212
9213 if (STACK_REG_P (cmp_op1)
9214 && stack_top_dies
9215 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9216 && REGNO (cmp_op1) != FIRST_STACK_REG)
9217 {
9218 /* If both the top of the 387 stack dies, and the other operand
9219 is also a stack register that dies, then this must be a
9220 `fcompp' float compare */
9221
9222 if (eflags_p)
9223 {
9224 /* There is no double popping fcomi variant. Fortunately,
9225 eflags is immune from the fstp's cc clobbering. */
9226 if (unordered_p)
9227 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9228 else
9229 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9230 return output_387_ffreep (operands, 0);
9231 }
9232 else
9233 {
9234 if (unordered_p)
9235 return "fucompp\n\tfnstsw\t%0";
9236 else
9237 return "fcompp\n\tfnstsw\t%0";
9238 }
9239 }
9240 else
9241 {
9242 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9243
9244 static const char * const alt[16] =
9245 {
9246 "fcom%z2\t%y2\n\tfnstsw\t%0",
9247 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9248 "fucom%z2\t%y2\n\tfnstsw\t%0",
9249 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9250
9251 "ficom%z2\t%y2\n\tfnstsw\t%0",
9252 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9253 NULL,
9254 NULL,
9255
9256 "fcomi\t{%y1, %0|%0, %y1}",
9257 "fcomip\t{%y1, %0|%0, %y1}",
9258 "fucomi\t{%y1, %0|%0, %y1}",
9259 "fucomip\t{%y1, %0|%0, %y1}",
9260
9261 NULL,
9262 NULL,
9263 NULL,
9264 NULL
9265 };
9266
9267 int mask;
9268 const char *ret;
9269
9270 mask = eflags_p << 3;
9271 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9272 mask |= unordered_p << 1;
9273 mask |= stack_top_dies;
9274
9275 gcc_assert (mask < 16);
9276 ret = alt[mask];
9277 gcc_assert (ret);
9278
9279 return ret;
9280 }
9281 }
9282
9283 void
9284 ix86_output_addr_vec_elt (FILE *file, int value)
9285 {
9286 const char *directive = ASM_LONG;
9287
9288 #ifdef ASM_QUAD
9289 if (TARGET_64BIT)
9290 directive = ASM_QUAD;
9291 #else
9292 gcc_assert (!TARGET_64BIT);
9293 #endif
9294
9295 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9296 }
9297
9298 void
9299 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9300 {
9301 if (TARGET_64BIT)
9302 fprintf (file, "%s%s%d-%s%d\n",
9303 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9304 else if (HAVE_AS_GOTOFF_IN_DATA)
9305 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9306 #if TARGET_MACHO
9307 else if (TARGET_MACHO)
9308 {
9309 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9310 machopic_output_function_base_name (file);
9311 fprintf(file, "\n");
9312 }
9313 #endif
9314 else
9315 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9316 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9317 }
9318 \f
9319 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9320 for the target. */
9321
9322 void
9323 ix86_expand_clear (rtx dest)
9324 {
9325 rtx tmp;
9326
9327 /* We play register width games, which are only valid after reload. */
9328 gcc_assert (reload_completed);
9329
9330 /* Avoid HImode and its attendant prefix byte. */
9331 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9332 dest = gen_rtx_REG (SImode, REGNO (dest));
9333
9334 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9335
9336 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9337 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9338 {
9339 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9340 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9341 }
9342
9343 emit_insn (tmp);
9344 }
9345
9346 /* X is an unchanging MEM. If it is a constant pool reference, return
9347 the constant pool rtx, else NULL. */
9348
9349 rtx
9350 maybe_get_pool_constant (rtx x)
9351 {
9352 x = ix86_delegitimize_address (XEXP (x, 0));
9353
9354 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9355 return get_pool_constant (x);
9356
9357 return NULL_RTX;
9358 }
9359
9360 void
9361 ix86_expand_move (enum machine_mode mode, rtx operands[])
9362 {
9363 int strict = (reload_in_progress || reload_completed);
9364 rtx op0, op1;
9365 enum tls_model model;
9366
9367 op0 = operands[0];
9368 op1 = operands[1];
9369
9370 if (GET_CODE (op1) == SYMBOL_REF)
9371 {
9372 model = SYMBOL_REF_TLS_MODEL (op1);
9373 if (model)
9374 {
9375 op1 = legitimize_tls_address (op1, model, true);
9376 op1 = force_operand (op1, op0);
9377 if (op1 == op0)
9378 return;
9379 }
9380 }
9381 else if (GET_CODE (op1) == CONST
9382 && GET_CODE (XEXP (op1, 0)) == PLUS
9383 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9384 {
9385 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9386 if (model)
9387 {
9388 rtx addend = XEXP (XEXP (op1, 0), 1);
9389 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9390 op1 = force_operand (op1, NULL);
9391 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9392 op0, 1, OPTAB_DIRECT);
9393 if (op1 == op0)
9394 return;
9395 }
9396 }
9397
9398 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9399 {
9400 if (TARGET_MACHO && !TARGET_64BIT)
9401 {
9402 #if TARGET_MACHO
9403 if (MACHOPIC_PURE)
9404 {
9405 rtx temp = ((reload_in_progress
9406 || ((op0 && REG_P (op0))
9407 && mode == Pmode))
9408 ? op0 : gen_reg_rtx (Pmode));
9409 op1 = machopic_indirect_data_reference (op1, temp);
9410 op1 = machopic_legitimize_pic_address (op1, mode,
9411 temp == op1 ? 0 : temp);
9412 }
9413 else if (MACHOPIC_INDIRECT)
9414 op1 = machopic_indirect_data_reference (op1, 0);
9415 if (op0 == op1)
9416 return;
9417 #endif
9418 }
9419 else
9420 {
9421 if (MEM_P (op0))
9422 op1 = force_reg (Pmode, op1);
9423 else
9424 op1 = legitimize_address (op1, op1, Pmode);
9425 }
9426 }
9427 else
9428 {
9429 if (MEM_P (op0)
9430 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9431 || !push_operand (op0, mode))
9432 && MEM_P (op1))
9433 op1 = force_reg (mode, op1);
9434
9435 if (push_operand (op0, mode)
9436 && ! general_no_elim_operand (op1, mode))
9437 op1 = copy_to_mode_reg (mode, op1);
9438
9439 /* Force large constants in 64bit compilation into register
9440 to get them CSEed. */
9441 if (TARGET_64BIT && mode == DImode
9442 && immediate_operand (op1, mode)
9443 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9444 && !register_operand (op0, mode)
9445 && optimize && !reload_completed && !reload_in_progress)
9446 op1 = copy_to_mode_reg (mode, op1);
9447
9448 if (FLOAT_MODE_P (mode))
9449 {
9450 /* If we are loading a floating point constant to a register,
9451 force the value to memory now, since we'll get better code
9452 out the back end. */
9453
9454 if (strict)
9455 ;
9456 else if (GET_CODE (op1) == CONST_DOUBLE)
9457 {
9458 op1 = validize_mem (force_const_mem (mode, op1));
9459 if (!register_operand (op0, mode))
9460 {
9461 rtx temp = gen_reg_rtx (mode);
9462 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9463 emit_move_insn (op0, temp);
9464 return;
9465 }
9466 }
9467 }
9468 }
9469
9470 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9471 }
9472
9473 void
9474 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9475 {
9476 rtx op0 = operands[0], op1 = operands[1];
9477
9478 /* Force constants other than zero into memory. We do not know how
9479 the instructions used to build constants modify the upper 64 bits
9480 of the register, once we have that information we may be able
9481 to handle some of them more efficiently. */
9482 if ((reload_in_progress | reload_completed) == 0
9483 && register_operand (op0, mode)
9484 && CONSTANT_P (op1)
9485 && standard_sse_constant_p (op1) <= 0)
9486 op1 = validize_mem (force_const_mem (mode, op1));
9487
9488 /* Make operand1 a register if it isn't already. */
9489 if (!no_new_pseudos
9490 && !register_operand (op0, mode)
9491 && !register_operand (op1, mode))
9492 {
9493 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9494 return;
9495 }
9496
9497 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9498 }
9499
9500 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9501 straight to ix86_expand_vector_move. */
9502
9503 void
9504 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9505 {
9506 rtx op0, op1, m;
9507
9508 op0 = operands[0];
9509 op1 = operands[1];
9510
9511 if (MEM_P (op1))
9512 {
9513 /* If we're optimizing for size, movups is the smallest. */
9514 if (optimize_size)
9515 {
9516 op0 = gen_lowpart (V4SFmode, op0);
9517 op1 = gen_lowpart (V4SFmode, op1);
9518 emit_insn (gen_sse_movups (op0, op1));
9519 return;
9520 }
9521
9522 /* ??? If we have typed data, then it would appear that using
9523 movdqu is the only way to get unaligned data loaded with
9524 integer type. */
9525 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9526 {
9527 op0 = gen_lowpart (V16QImode, op0);
9528 op1 = gen_lowpart (V16QImode, op1);
9529 emit_insn (gen_sse2_movdqu (op0, op1));
9530 return;
9531 }
9532
9533 if (TARGET_SSE2 && mode == V2DFmode)
9534 {
9535 rtx zero;
9536
9537 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9538 {
9539 op0 = gen_lowpart (V2DFmode, op0);
9540 op1 = gen_lowpart (V2DFmode, op1);
9541 emit_insn (gen_sse2_movupd (op0, op1));
9542 return;
9543 }
9544
9545 /* When SSE registers are split into halves, we can avoid
9546 writing to the top half twice. */
9547 if (TARGET_SSE_SPLIT_REGS)
9548 {
9549 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9550 zero = op0;
9551 }
9552 else
9553 {
9554 /* ??? Not sure about the best option for the Intel chips.
9555 The following would seem to satisfy; the register is
9556 entirely cleared, breaking the dependency chain. We
9557 then store to the upper half, with a dependency depth
9558 of one. A rumor has it that Intel recommends two movsd
9559 followed by an unpacklpd, but this is unconfirmed. And
9560 given that the dependency depth of the unpacklpd would
9561 still be one, I'm not sure why this would be better. */
9562 zero = CONST0_RTX (V2DFmode);
9563 }
9564
9565 m = adjust_address (op1, DFmode, 0);
9566 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9567 m = adjust_address (op1, DFmode, 8);
9568 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9569 }
9570 else
9571 {
9572 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9573 {
9574 op0 = gen_lowpart (V4SFmode, op0);
9575 op1 = gen_lowpart (V4SFmode, op1);
9576 emit_insn (gen_sse_movups (op0, op1));
9577 return;
9578 }
9579
9580 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9581 emit_move_insn (op0, CONST0_RTX (mode));
9582 else
9583 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9584
9585 if (mode != V4SFmode)
9586 op0 = gen_lowpart (V4SFmode, op0);
9587 m = adjust_address (op1, V2SFmode, 0);
9588 emit_insn (gen_sse_loadlps (op0, op0, m));
9589 m = adjust_address (op1, V2SFmode, 8);
9590 emit_insn (gen_sse_loadhps (op0, op0, m));
9591 }
9592 }
9593 else if (MEM_P (op0))
9594 {
9595 /* If we're optimizing for size, movups is the smallest. */
9596 if (optimize_size)
9597 {
9598 op0 = gen_lowpart (V4SFmode, op0);
9599 op1 = gen_lowpart (V4SFmode, op1);
9600 emit_insn (gen_sse_movups (op0, op1));
9601 return;
9602 }
9603
9604 /* ??? Similar to above, only less clear because of quote
9605 typeless stores unquote. */
9606 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9607 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9608 {
9609 op0 = gen_lowpart (V16QImode, op0);
9610 op1 = gen_lowpart (V16QImode, op1);
9611 emit_insn (gen_sse2_movdqu (op0, op1));
9612 return;
9613 }
9614
9615 if (TARGET_SSE2 && mode == V2DFmode)
9616 {
9617 m = adjust_address (op0, DFmode, 0);
9618 emit_insn (gen_sse2_storelpd (m, op1));
9619 m = adjust_address (op0, DFmode, 8);
9620 emit_insn (gen_sse2_storehpd (m, op1));
9621 }
9622 else
9623 {
9624 if (mode != V4SFmode)
9625 op1 = gen_lowpart (V4SFmode, op1);
9626 m = adjust_address (op0, V2SFmode, 0);
9627 emit_insn (gen_sse_storelps (m, op1));
9628 m = adjust_address (op0, V2SFmode, 8);
9629 emit_insn (gen_sse_storehps (m, op1));
9630 }
9631 }
9632 else
9633 gcc_unreachable ();
9634 }
9635
9636 /* Expand a push in MODE. This is some mode for which we do not support
9637 proper push instructions, at least from the registers that we expect
9638 the value to live in. */
9639
9640 void
9641 ix86_expand_push (enum machine_mode mode, rtx x)
9642 {
9643 rtx tmp;
9644
9645 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9646 GEN_INT (-GET_MODE_SIZE (mode)),
9647 stack_pointer_rtx, 1, OPTAB_DIRECT);
9648 if (tmp != stack_pointer_rtx)
9649 emit_move_insn (stack_pointer_rtx, tmp);
9650
9651 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9652 emit_move_insn (tmp, x);
9653 }
9654
9655 /* Helper function of ix86_fixup_binary_operands to canonicalize
9656 operand order. Returns true if the operands should be swapped. */
9657
9658 static bool
9659 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9660 rtx operands[])
9661 {
9662 rtx dst = operands[0];
9663 rtx src1 = operands[1];
9664 rtx src2 = operands[2];
9665
9666 /* If the operation is not commutative, we can't do anything. */
9667 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9668 return false;
9669
9670 /* Highest priority is that src1 should match dst. */
9671 if (rtx_equal_p (dst, src1))
9672 return false;
9673 if (rtx_equal_p (dst, src2))
9674 return true;
9675
9676 /* Next highest priority is that immediate constants come second. */
9677 if (immediate_operand (src2, mode))
9678 return false;
9679 if (immediate_operand (src1, mode))
9680 return true;
9681
9682 /* Lowest priority is that memory references should come second. */
9683 if (MEM_P (src2))
9684 return false;
9685 if (MEM_P (src1))
9686 return true;
9687
9688 return false;
9689 }
9690
9691
9692 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9693 destination to use for the operation. If different from the true
9694 destination in operands[0], a copy operation will be required. */
9695
9696 rtx
9697 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9698 rtx operands[])
9699 {
9700 rtx dst = operands[0];
9701 rtx src1 = operands[1];
9702 rtx src2 = operands[2];
9703
9704 /* Canonicalize operand order. */
9705 if (ix86_swap_binary_operands_p (code, mode, operands))
9706 {
9707 rtx temp = src1;
9708 src1 = src2;
9709 src2 = temp;
9710 }
9711
9712 /* Both source operands cannot be in memory. */
9713 if (MEM_P (src1) && MEM_P (src2))
9714 {
9715 /* Optimization: Only read from memory once. */
9716 if (rtx_equal_p (src1, src2))
9717 {
9718 src2 = force_reg (mode, src2);
9719 src1 = src2;
9720 }
9721 else
9722 src2 = force_reg (mode, src2);
9723 }
9724
9725 /* If the destination is memory, and we do not have matching source
9726 operands, do things in registers. */
9727 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9728 dst = gen_reg_rtx (mode);
9729
9730 /* Source 1 cannot be a constant. */
9731 if (CONSTANT_P (src1))
9732 src1 = force_reg (mode, src1);
9733
9734 /* Source 1 cannot be a non-matching memory. */
9735 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9736 src1 = force_reg (mode, src1);
9737
9738 operands[1] = src1;
9739 operands[2] = src2;
9740 return dst;
9741 }
9742
9743 /* Similarly, but assume that the destination has already been
9744 set up properly. */
9745
9746 void
9747 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9748 enum machine_mode mode, rtx operands[])
9749 {
9750 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9751 gcc_assert (dst == operands[0]);
9752 }
9753
9754 /* Attempt to expand a binary operator. Make the expansion closer to the
9755 actual machine, then just general_operand, which will allow 3 separate
9756 memory references (one output, two input) in a single insn. */
9757
9758 void
9759 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9760 rtx operands[])
9761 {
9762 rtx src1, src2, dst, op, clob;
9763
9764 dst = ix86_fixup_binary_operands (code, mode, operands);
9765 src1 = operands[1];
9766 src2 = operands[2];
9767
9768 /* Emit the instruction. */
9769
9770 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9771 if (reload_in_progress)
9772 {
9773 /* Reload doesn't know about the flags register, and doesn't know that
9774 it doesn't want to clobber it. We can only do this with PLUS. */
9775 gcc_assert (code == PLUS);
9776 emit_insn (op);
9777 }
9778 else
9779 {
9780 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9781 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9782 }
9783
9784 /* Fix up the destination if needed. */
9785 if (dst != operands[0])
9786 emit_move_insn (operands[0], dst);
9787 }
9788
9789 /* Return TRUE or FALSE depending on whether the binary operator meets the
9790 appropriate constraints. */
9791
9792 int
9793 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9794 rtx operands[3])
9795 {
9796 rtx dst = operands[0];
9797 rtx src1 = operands[1];
9798 rtx src2 = operands[2];
9799
9800 /* Both source operands cannot be in memory. */
9801 if (MEM_P (src1) && MEM_P (src2))
9802 return 0;
9803
9804 /* Canonicalize operand order for commutative operators. */
9805 if (ix86_swap_binary_operands_p (code, mode, operands))
9806 {
9807 rtx temp = src1;
9808 src1 = src2;
9809 src2 = temp;
9810 }
9811
9812 /* If the destination is memory, we must have a matching source operand. */
9813 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9814 return 0;
9815
9816 /* Source 1 cannot be a constant. */
9817 if (CONSTANT_P (src1))
9818 return 0;
9819
9820 /* Source 1 cannot be a non-matching memory. */
9821 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9822 return 0;
9823
9824 return 1;
9825 }
9826
9827 /* Attempt to expand a unary operator. Make the expansion closer to the
9828 actual machine, then just general_operand, which will allow 2 separate
9829 memory references (one output, one input) in a single insn. */
9830
9831 void
9832 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9833 rtx operands[])
9834 {
9835 int matching_memory;
9836 rtx src, dst, op, clob;
9837
9838 dst = operands[0];
9839 src = operands[1];
9840
9841 /* If the destination is memory, and we do not have matching source
9842 operands, do things in registers. */
9843 matching_memory = 0;
9844 if (MEM_P (dst))
9845 {
9846 if (rtx_equal_p (dst, src))
9847 matching_memory = 1;
9848 else
9849 dst = gen_reg_rtx (mode);
9850 }
9851
9852 /* When source operand is memory, destination must match. */
9853 if (MEM_P (src) && !matching_memory)
9854 src = force_reg (mode, src);
9855
9856 /* Emit the instruction. */
9857
9858 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9859 if (reload_in_progress || code == NOT)
9860 {
9861 /* Reload doesn't know about the flags register, and doesn't know that
9862 it doesn't want to clobber it. */
9863 gcc_assert (code == NOT);
9864 emit_insn (op);
9865 }
9866 else
9867 {
9868 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9869 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9870 }
9871
9872 /* Fix up the destination if needed. */
9873 if (dst != operands[0])
9874 emit_move_insn (operands[0], dst);
9875 }
9876
9877 /* Return TRUE or FALSE depending on whether the unary operator meets the
9878 appropriate constraints. */
9879
9880 int
9881 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9882 enum machine_mode mode ATTRIBUTE_UNUSED,
9883 rtx operands[2] ATTRIBUTE_UNUSED)
9884 {
9885 /* If one of operands is memory, source and destination must match. */
9886 if ((MEM_P (operands[0])
9887 || MEM_P (operands[1]))
9888 && ! rtx_equal_p (operands[0], operands[1]))
9889 return FALSE;
9890 return TRUE;
9891 }
9892
9893 /* Post-reload splitter for converting an SF or DFmode value in an
9894 SSE register into an unsigned SImode. */
9895
9896 void
9897 ix86_split_convert_uns_si_sse (rtx operands[])
9898 {
9899 enum machine_mode vecmode;
9900 rtx value, large, zero_or_two31, input, two31, x;
9901
9902 large = operands[1];
9903 zero_or_two31 = operands[2];
9904 input = operands[3];
9905 two31 = operands[4];
9906 vecmode = GET_MODE (large);
9907 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9908
9909 /* Load up the value into the low element. We must ensure that the other
9910 elements are valid floats -- zero is the easiest such value. */
9911 if (MEM_P (input))
9912 {
9913 if (vecmode == V4SFmode)
9914 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9915 else
9916 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9917 }
9918 else
9919 {
9920 input = gen_rtx_REG (vecmode, REGNO (input));
9921 emit_move_insn (value, CONST0_RTX (vecmode));
9922 if (vecmode == V4SFmode)
9923 emit_insn (gen_sse_movss (value, value, input));
9924 else
9925 emit_insn (gen_sse2_movsd (value, value, input));
9926 }
9927
9928 emit_move_insn (large, two31);
9929 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9930
9931 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9932 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9933
9934 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9935 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9936
9937 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9938 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9939
9940 large = gen_rtx_REG (V4SImode, REGNO (large));
9941 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9942
9943 x = gen_rtx_REG (V4SImode, REGNO (value));
9944 if (vecmode == V4SFmode)
9945 emit_insn (gen_sse2_cvttps2dq (x, value));
9946 else
9947 emit_insn (gen_sse2_cvttpd2dq (x, value));
9948 value = x;
9949
9950 emit_insn (gen_xorv4si3 (value, value, large));
9951 }
9952
9953 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9954 Expects the 64-bit DImode to be supplied in a pair of integral
9955 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9956 -mfpmath=sse, !optimize_size only. */
9957
9958 void
9959 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9960 {
9961 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9962 rtx int_xmm, fp_xmm;
9963 rtx biases, exponents;
9964 rtx x;
9965
9966 int_xmm = gen_reg_rtx (V4SImode);
9967 if (TARGET_INTER_UNIT_MOVES)
9968 emit_insn (gen_movdi_to_sse (int_xmm, input));
9969 else if (TARGET_SSE_SPLIT_REGS)
9970 {
9971 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9972 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9973 }
9974 else
9975 {
9976 x = gen_reg_rtx (V2DImode);
9977 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9978 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9979 }
9980
9981 x = gen_rtx_CONST_VECTOR (V4SImode,
9982 gen_rtvec (4, GEN_INT (0x43300000UL),
9983 GEN_INT (0x45300000UL),
9984 const0_rtx, const0_rtx));
9985 exponents = validize_mem (force_const_mem (V4SImode, x));
9986
9987 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9988 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9989
9990 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9991 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9992 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9993 (0x1.0p84 + double(fp_value_hi_xmm)).
9994 Note these exponents differ by 32. */
9995
9996 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9997
9998 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9999 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10000 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10001 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10002 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10003 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10004 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10005 biases = validize_mem (force_const_mem (V2DFmode, biases));
10006 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10007
10008 /* Add the upper and lower DFmode values together. */
10009 if (TARGET_SSE3)
10010 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10011 else
10012 {
10013 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10014 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10015 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10016 }
10017
10018 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10019 }
10020
10021 /* Convert an unsigned SImode value into a DFmode. Only currently used
10022 for SSE, but applicable anywhere. */
10023
10024 void
10025 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10026 {
10027 REAL_VALUE_TYPE TWO31r;
10028 rtx x, fp;
10029
10030 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10031 NULL, 1, OPTAB_DIRECT);
10032
10033 fp = gen_reg_rtx (DFmode);
10034 emit_insn (gen_floatsidf2 (fp, x));
10035
10036 real_ldexp (&TWO31r, &dconst1, 31);
10037 x = const_double_from_real_value (TWO31r, DFmode);
10038
10039 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10040 if (x != target)
10041 emit_move_insn (target, x);
10042 }
10043
10044 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10045 32-bit mode; otherwise we have a direct convert instruction. */
10046
10047 void
10048 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10049 {
10050 REAL_VALUE_TYPE TWO32r;
10051 rtx fp_lo, fp_hi, x;
10052
10053 fp_lo = gen_reg_rtx (DFmode);
10054 fp_hi = gen_reg_rtx (DFmode);
10055
10056 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10057
10058 real_ldexp (&TWO32r, &dconst1, 32);
10059 x = const_double_from_real_value (TWO32r, DFmode);
10060 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10061
10062 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10063
10064 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10065 0, OPTAB_DIRECT);
10066 if (x != target)
10067 emit_move_insn (target, x);
10068 }
10069
10070 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10071 For x86_32, -mfpmath=sse, !optimize_size only. */
10072 void
10073 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10074 {
10075 REAL_VALUE_TYPE ONE16r;
10076 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10077
10078 real_ldexp (&ONE16r, &dconst1, 16);
10079 x = const_double_from_real_value (ONE16r, SFmode);
10080 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10081 NULL, 0, OPTAB_DIRECT);
10082 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10083 NULL, 0, OPTAB_DIRECT);
10084 fp_hi = gen_reg_rtx (SFmode);
10085 fp_lo = gen_reg_rtx (SFmode);
10086 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10087 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10088 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10089 0, OPTAB_DIRECT);
10090 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10091 0, OPTAB_DIRECT);
10092 if (!rtx_equal_p (target, fp_hi))
10093 emit_move_insn (target, fp_hi);
10094 }
10095
10096 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10097 then replicate the value for all elements of the vector
10098 register. */
10099
10100 rtx
10101 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10102 {
10103 rtvec v;
10104 switch (mode)
10105 {
10106 case SFmode:
10107 if (vect)
10108 v = gen_rtvec (4, value, value, value, value);
10109 else
10110 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10111 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10112 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10113
10114 case DFmode:
10115 if (vect)
10116 v = gen_rtvec (2, value, value);
10117 else
10118 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10119 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10120
10121 default:
10122 gcc_unreachable ();
10123 }
10124 }
10125
10126 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10127 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10128 true, then replicate the mask for all elements of the vector register.
10129 If INVERT is true, then create a mask excluding the sign bit. */
10130
10131 rtx
10132 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10133 {
10134 enum machine_mode vec_mode;
10135 HOST_WIDE_INT hi, lo;
10136 int shift = 63;
10137 rtx v;
10138 rtx mask;
10139
10140 /* Find the sign bit, sign extended to 2*HWI. */
10141 if (mode == SFmode)
10142 lo = 0x80000000, hi = lo < 0;
10143 else if (HOST_BITS_PER_WIDE_INT >= 64)
10144 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10145 else
10146 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10147
10148 if (invert)
10149 lo = ~lo, hi = ~hi;
10150
10151 /* Force this value into the low part of a fp vector constant. */
10152 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10153 mask = gen_lowpart (mode, mask);
10154
10155 v = ix86_build_const_vector (mode, vect, mask);
10156 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10157 return force_reg (vec_mode, v);
10158 }
10159
10160 /* Generate code for floating point ABS or NEG. */
10161
10162 void
10163 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10164 rtx operands[])
10165 {
10166 rtx mask, set, use, clob, dst, src;
10167 bool matching_memory;
10168 bool use_sse = false;
10169 bool vector_mode = VECTOR_MODE_P (mode);
10170 enum machine_mode elt_mode = mode;
10171
10172 if (vector_mode)
10173 {
10174 elt_mode = GET_MODE_INNER (mode);
10175 use_sse = true;
10176 }
10177 else if (TARGET_SSE_MATH)
10178 use_sse = SSE_FLOAT_MODE_P (mode);
10179
10180 /* NEG and ABS performed with SSE use bitwise mask operations.
10181 Create the appropriate mask now. */
10182 if (use_sse)
10183 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10184 else
10185 mask = NULL_RTX;
10186
10187 dst = operands[0];
10188 src = operands[1];
10189
10190 /* If the destination is memory, and we don't have matching source
10191 operands or we're using the x87, do things in registers. */
10192 matching_memory = false;
10193 if (MEM_P (dst))
10194 {
10195 if (use_sse && rtx_equal_p (dst, src))
10196 matching_memory = true;
10197 else
10198 dst = gen_reg_rtx (mode);
10199 }
10200 if (MEM_P (src) && !matching_memory)
10201 src = force_reg (mode, src);
10202
10203 if (vector_mode)
10204 {
10205 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10206 set = gen_rtx_SET (VOIDmode, dst, set);
10207 emit_insn (set);
10208 }
10209 else
10210 {
10211 set = gen_rtx_fmt_e (code, mode, src);
10212 set = gen_rtx_SET (VOIDmode, dst, set);
10213 if (mask)
10214 {
10215 use = gen_rtx_USE (VOIDmode, mask);
10216 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10217 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10218 gen_rtvec (3, set, use, clob)));
10219 }
10220 else
10221 emit_insn (set);
10222 }
10223
10224 if (dst != operands[0])
10225 emit_move_insn (operands[0], dst);
10226 }
10227
10228 /* Expand a copysign operation. Special case operand 0 being a constant. */
10229
10230 void
10231 ix86_expand_copysign (rtx operands[])
10232 {
10233 enum machine_mode mode, vmode;
10234 rtx dest, op0, op1, mask, nmask;
10235
10236 dest = operands[0];
10237 op0 = operands[1];
10238 op1 = operands[2];
10239
10240 mode = GET_MODE (dest);
10241 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10242
10243 if (GET_CODE (op0) == CONST_DOUBLE)
10244 {
10245 rtvec v;
10246
10247 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10248 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10249
10250 if (op0 == CONST0_RTX (mode))
10251 op0 = CONST0_RTX (vmode);
10252 else
10253 {
10254 if (mode == SFmode)
10255 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10256 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10257 else
10258 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10259 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10260 }
10261
10262 mask = ix86_build_signbit_mask (mode, 0, 0);
10263
10264 if (mode == SFmode)
10265 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10266 else
10267 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10268 }
10269 else
10270 {
10271 nmask = ix86_build_signbit_mask (mode, 0, 1);
10272 mask = ix86_build_signbit_mask (mode, 0, 0);
10273
10274 if (mode == SFmode)
10275 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10276 else
10277 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10278 }
10279 }
10280
10281 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10282 be a constant, and so has already been expanded into a vector constant. */
10283
10284 void
10285 ix86_split_copysign_const (rtx operands[])
10286 {
10287 enum machine_mode mode, vmode;
10288 rtx dest, op0, op1, mask, x;
10289
10290 dest = operands[0];
10291 op0 = operands[1];
10292 op1 = operands[2];
10293 mask = operands[3];
10294
10295 mode = GET_MODE (dest);
10296 vmode = GET_MODE (mask);
10297
10298 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10299 x = gen_rtx_AND (vmode, dest, mask);
10300 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10301
10302 if (op0 != CONST0_RTX (vmode))
10303 {
10304 x = gen_rtx_IOR (vmode, dest, op0);
10305 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10306 }
10307 }
10308
10309 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10310 so we have to do two masks. */
10311
10312 void
10313 ix86_split_copysign_var (rtx operands[])
10314 {
10315 enum machine_mode mode, vmode;
10316 rtx dest, scratch, op0, op1, mask, nmask, x;
10317
10318 dest = operands[0];
10319 scratch = operands[1];
10320 op0 = operands[2];
10321 op1 = operands[3];
10322 nmask = operands[4];
10323 mask = operands[5];
10324
10325 mode = GET_MODE (dest);
10326 vmode = GET_MODE (mask);
10327
10328 if (rtx_equal_p (op0, op1))
10329 {
10330 /* Shouldn't happen often (it's useless, obviously), but when it does
10331 we'd generate incorrect code if we continue below. */
10332 emit_move_insn (dest, op0);
10333 return;
10334 }
10335
10336 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10337 {
10338 gcc_assert (REGNO (op1) == REGNO (scratch));
10339
10340 x = gen_rtx_AND (vmode, scratch, mask);
10341 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10342
10343 dest = mask;
10344 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10345 x = gen_rtx_NOT (vmode, dest);
10346 x = gen_rtx_AND (vmode, x, op0);
10347 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10348 }
10349 else
10350 {
10351 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10352 {
10353 x = gen_rtx_AND (vmode, scratch, mask);
10354 }
10355 else /* alternative 2,4 */
10356 {
10357 gcc_assert (REGNO (mask) == REGNO (scratch));
10358 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10359 x = gen_rtx_AND (vmode, scratch, op1);
10360 }
10361 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10362
10363 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10364 {
10365 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10366 x = gen_rtx_AND (vmode, dest, nmask);
10367 }
10368 else /* alternative 3,4 */
10369 {
10370 gcc_assert (REGNO (nmask) == REGNO (dest));
10371 dest = nmask;
10372 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10373 x = gen_rtx_AND (vmode, dest, op0);
10374 }
10375 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10376 }
10377
10378 x = gen_rtx_IOR (vmode, dest, scratch);
10379 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10380 }
10381
10382 /* Return TRUE or FALSE depending on whether the first SET in INSN
10383 has source and destination with matching CC modes, and that the
10384 CC mode is at least as constrained as REQ_MODE. */
10385
10386 int
10387 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10388 {
10389 rtx set;
10390 enum machine_mode set_mode;
10391
10392 set = PATTERN (insn);
10393 if (GET_CODE (set) == PARALLEL)
10394 set = XVECEXP (set, 0, 0);
10395 gcc_assert (GET_CODE (set) == SET);
10396 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10397
10398 set_mode = GET_MODE (SET_DEST (set));
10399 switch (set_mode)
10400 {
10401 case CCNOmode:
10402 if (req_mode != CCNOmode
10403 && (req_mode != CCmode
10404 || XEXP (SET_SRC (set), 1) != const0_rtx))
10405 return 0;
10406 break;
10407 case CCmode:
10408 if (req_mode == CCGCmode)
10409 return 0;
10410 /* FALLTHRU */
10411 case CCGCmode:
10412 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10413 return 0;
10414 /* FALLTHRU */
10415 case CCGOCmode:
10416 if (req_mode == CCZmode)
10417 return 0;
10418 /* FALLTHRU */
10419 case CCZmode:
10420 break;
10421
10422 default:
10423 gcc_unreachable ();
10424 }
10425
10426 return (GET_MODE (SET_SRC (set)) == set_mode);
10427 }
10428
10429 /* Generate insn patterns to do an integer compare of OPERANDS. */
10430
10431 static rtx
10432 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10433 {
10434 enum machine_mode cmpmode;
10435 rtx tmp, flags;
10436
10437 cmpmode = SELECT_CC_MODE (code, op0, op1);
10438 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10439
10440 /* This is very simple, but making the interface the same as in the
10441 FP case makes the rest of the code easier. */
10442 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10443 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10444
10445 /* Return the test that should be put into the flags user, i.e.
10446 the bcc, scc, or cmov instruction. */
10447 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10448 }
10449
10450 /* Figure out whether to use ordered or unordered fp comparisons.
10451 Return the appropriate mode to use. */
10452
10453 enum machine_mode
10454 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10455 {
10456 /* ??? In order to make all comparisons reversible, we do all comparisons
10457 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10458 all forms trapping and nontrapping comparisons, we can make inequality
10459 comparisons trapping again, since it results in better code when using
10460 FCOM based compares. */
10461 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10462 }
10463
10464 enum machine_mode
10465 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10466 {
10467 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10468 return ix86_fp_compare_mode (code);
10469 switch (code)
10470 {
10471 /* Only zero flag is needed. */
10472 case EQ: /* ZF=0 */
10473 case NE: /* ZF!=0 */
10474 return CCZmode;
10475 /* Codes needing carry flag. */
10476 case GEU: /* CF=0 */
10477 case GTU: /* CF=0 & ZF=0 */
10478 case LTU: /* CF=1 */
10479 case LEU: /* CF=1 | ZF=1 */
10480 return CCmode;
10481 /* Codes possibly doable only with sign flag when
10482 comparing against zero. */
10483 case GE: /* SF=OF or SF=0 */
10484 case LT: /* SF<>OF or SF=1 */
10485 if (op1 == const0_rtx)
10486 return CCGOCmode;
10487 else
10488 /* For other cases Carry flag is not required. */
10489 return CCGCmode;
10490 /* Codes doable only with sign flag when comparing
10491 against zero, but we miss jump instruction for it
10492 so we need to use relational tests against overflow
10493 that thus needs to be zero. */
10494 case GT: /* ZF=0 & SF=OF */
10495 case LE: /* ZF=1 | SF<>OF */
10496 if (op1 == const0_rtx)
10497 return CCNOmode;
10498 else
10499 return CCGCmode;
10500 /* strcmp pattern do (use flags) and combine may ask us for proper
10501 mode. */
10502 case USE:
10503 return CCmode;
10504 default:
10505 gcc_unreachable ();
10506 }
10507 }
10508
10509 /* Return the fixed registers used for condition codes. */
10510
10511 static bool
10512 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10513 {
10514 *p1 = FLAGS_REG;
10515 *p2 = FPSR_REG;
10516 return true;
10517 }
10518
10519 /* If two condition code modes are compatible, return a condition code
10520 mode which is compatible with both. Otherwise, return
10521 VOIDmode. */
10522
10523 static enum machine_mode
10524 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10525 {
10526 if (m1 == m2)
10527 return m1;
10528
10529 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10530 return VOIDmode;
10531
10532 if ((m1 == CCGCmode && m2 == CCGOCmode)
10533 || (m1 == CCGOCmode && m2 == CCGCmode))
10534 return CCGCmode;
10535
10536 switch (m1)
10537 {
10538 default:
10539 gcc_unreachable ();
10540
10541 case CCmode:
10542 case CCGCmode:
10543 case CCGOCmode:
10544 case CCNOmode:
10545 case CCZmode:
10546 switch (m2)
10547 {
10548 default:
10549 return VOIDmode;
10550
10551 case CCmode:
10552 case CCGCmode:
10553 case CCGOCmode:
10554 case CCNOmode:
10555 case CCZmode:
10556 return CCmode;
10557 }
10558
10559 case CCFPmode:
10560 case CCFPUmode:
10561 /* These are only compatible with themselves, which we already
10562 checked above. */
10563 return VOIDmode;
10564 }
10565 }
10566
10567 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10568
10569 int
10570 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10571 {
10572 enum rtx_code swapped_code = swap_condition (code);
10573 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10574 || (ix86_fp_comparison_cost (swapped_code)
10575 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10576 }
10577
10578 /* Swap, force into registers, or otherwise massage the two operands
10579 to a fp comparison. The operands are updated in place; the new
10580 comparison code is returned. */
10581
10582 static enum rtx_code
10583 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10584 {
10585 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10586 rtx op0 = *pop0, op1 = *pop1;
10587 enum machine_mode op_mode = GET_MODE (op0);
10588 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10589
10590 /* All of the unordered compare instructions only work on registers.
10591 The same is true of the fcomi compare instructions. The XFmode
10592 compare instructions require registers except when comparing
10593 against zero or when converting operand 1 from fixed point to
10594 floating point. */
10595
10596 if (!is_sse
10597 && (fpcmp_mode == CCFPUmode
10598 || (op_mode == XFmode
10599 && ! (standard_80387_constant_p (op0) == 1
10600 || standard_80387_constant_p (op1) == 1)
10601 && GET_CODE (op1) != FLOAT)
10602 || ix86_use_fcomi_compare (code)))
10603 {
10604 op0 = force_reg (op_mode, op0);
10605 op1 = force_reg (op_mode, op1);
10606 }
10607 else
10608 {
10609 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10610 things around if they appear profitable, otherwise force op0
10611 into a register. */
10612
10613 if (standard_80387_constant_p (op0) == 0
10614 || (MEM_P (op0)
10615 && ! (standard_80387_constant_p (op1) == 0
10616 || MEM_P (op1))))
10617 {
10618 rtx tmp;
10619 tmp = op0, op0 = op1, op1 = tmp;
10620 code = swap_condition (code);
10621 }
10622
10623 if (!REG_P (op0))
10624 op0 = force_reg (op_mode, op0);
10625
10626 if (CONSTANT_P (op1))
10627 {
10628 int tmp = standard_80387_constant_p (op1);
10629 if (tmp == 0)
10630 op1 = validize_mem (force_const_mem (op_mode, op1));
10631 else if (tmp == 1)
10632 {
10633 if (TARGET_CMOVE)
10634 op1 = force_reg (op_mode, op1);
10635 }
10636 else
10637 op1 = force_reg (op_mode, op1);
10638 }
10639 }
10640
10641 /* Try to rearrange the comparison to make it cheaper. */
10642 if (ix86_fp_comparison_cost (code)
10643 > ix86_fp_comparison_cost (swap_condition (code))
10644 && (REG_P (op1) || !no_new_pseudos))
10645 {
10646 rtx tmp;
10647 tmp = op0, op0 = op1, op1 = tmp;
10648 code = swap_condition (code);
10649 if (!REG_P (op0))
10650 op0 = force_reg (op_mode, op0);
10651 }
10652
10653 *pop0 = op0;
10654 *pop1 = op1;
10655 return code;
10656 }
10657
10658 /* Convert comparison codes we use to represent FP comparison to integer
10659 code that will result in proper branch. Return UNKNOWN if no such code
10660 is available. */
10661
10662 enum rtx_code
10663 ix86_fp_compare_code_to_integer (enum rtx_code code)
10664 {
10665 switch (code)
10666 {
10667 case GT:
10668 return GTU;
10669 case GE:
10670 return GEU;
10671 case ORDERED:
10672 case UNORDERED:
10673 return code;
10674 break;
10675 case UNEQ:
10676 return EQ;
10677 break;
10678 case UNLT:
10679 return LTU;
10680 break;
10681 case UNLE:
10682 return LEU;
10683 break;
10684 case LTGT:
10685 return NE;
10686 break;
10687 default:
10688 return UNKNOWN;
10689 }
10690 }
10691
10692 /* Split comparison code CODE into comparisons we can do using branch
10693 instructions. BYPASS_CODE is comparison code for branch that will
10694 branch around FIRST_CODE and SECOND_CODE. If some of branches
10695 is not required, set value to UNKNOWN.
10696 We never require more than two branches. */
10697
10698 void
10699 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10700 enum rtx_code *first_code,
10701 enum rtx_code *second_code)
10702 {
10703 *first_code = code;
10704 *bypass_code = UNKNOWN;
10705 *second_code = UNKNOWN;
10706
10707 /* The fcomi comparison sets flags as follows:
10708
10709 cmp ZF PF CF
10710 > 0 0 0
10711 < 0 0 1
10712 = 1 0 0
10713 un 1 1 1 */
10714
10715 switch (code)
10716 {
10717 case GT: /* GTU - CF=0 & ZF=0 */
10718 case GE: /* GEU - CF=0 */
10719 case ORDERED: /* PF=0 */
10720 case UNORDERED: /* PF=1 */
10721 case UNEQ: /* EQ - ZF=1 */
10722 case UNLT: /* LTU - CF=1 */
10723 case UNLE: /* LEU - CF=1 | ZF=1 */
10724 case LTGT: /* EQ - ZF=0 */
10725 break;
10726 case LT: /* LTU - CF=1 - fails on unordered */
10727 *first_code = UNLT;
10728 *bypass_code = UNORDERED;
10729 break;
10730 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10731 *first_code = UNLE;
10732 *bypass_code = UNORDERED;
10733 break;
10734 case EQ: /* EQ - ZF=1 - fails on unordered */
10735 *first_code = UNEQ;
10736 *bypass_code = UNORDERED;
10737 break;
10738 case NE: /* NE - ZF=0 - fails on unordered */
10739 *first_code = LTGT;
10740 *second_code = UNORDERED;
10741 break;
10742 case UNGE: /* GEU - CF=0 - fails on unordered */
10743 *first_code = GE;
10744 *second_code = UNORDERED;
10745 break;
10746 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10747 *first_code = GT;
10748 *second_code = UNORDERED;
10749 break;
10750 default:
10751 gcc_unreachable ();
10752 }
10753 if (!TARGET_IEEE_FP)
10754 {
10755 *second_code = UNKNOWN;
10756 *bypass_code = UNKNOWN;
10757 }
10758 }
10759
10760 /* Return cost of comparison done fcom + arithmetics operations on AX.
10761 All following functions do use number of instructions as a cost metrics.
10762 In future this should be tweaked to compute bytes for optimize_size and
10763 take into account performance of various instructions on various CPUs. */
10764 static int
10765 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10766 {
10767 if (!TARGET_IEEE_FP)
10768 return 4;
10769 /* The cost of code output by ix86_expand_fp_compare. */
10770 switch (code)
10771 {
10772 case UNLE:
10773 case UNLT:
10774 case LTGT:
10775 case GT:
10776 case GE:
10777 case UNORDERED:
10778 case ORDERED:
10779 case UNEQ:
10780 return 4;
10781 break;
10782 case LT:
10783 case NE:
10784 case EQ:
10785 case UNGE:
10786 return 5;
10787 break;
10788 case LE:
10789 case UNGT:
10790 return 6;
10791 break;
10792 default:
10793 gcc_unreachable ();
10794 }
10795 }
10796
10797 /* Return cost of comparison done using fcomi operation.
10798 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10799 static int
10800 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10801 {
10802 enum rtx_code bypass_code, first_code, second_code;
10803 /* Return arbitrarily high cost when instruction is not supported - this
10804 prevents gcc from using it. */
10805 if (!TARGET_CMOVE)
10806 return 1024;
10807 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10808 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10809 }
10810
10811 /* Return cost of comparison done using sahf operation.
10812 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10813 static int
10814 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10815 {
10816 enum rtx_code bypass_code, first_code, second_code;
10817 /* Return arbitrarily high cost when instruction is not preferred - this
10818 avoids gcc from using it. */
10819 if (!TARGET_USE_SAHF && !optimize_size)
10820 return 1024;
10821 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10822 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10823 }
10824
10825 /* Compute cost of the comparison done using any method.
10826 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10827 static int
10828 ix86_fp_comparison_cost (enum rtx_code code)
10829 {
10830 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10831 int min;
10832
10833 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10834 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10835
10836 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10837 if (min > sahf_cost)
10838 min = sahf_cost;
10839 if (min > fcomi_cost)
10840 min = fcomi_cost;
10841 return min;
10842 }
10843
10844 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10845
10846 static rtx
10847 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10848 rtx *second_test, rtx *bypass_test)
10849 {
10850 enum machine_mode fpcmp_mode, intcmp_mode;
10851 rtx tmp, tmp2;
10852 int cost = ix86_fp_comparison_cost (code);
10853 enum rtx_code bypass_code, first_code, second_code;
10854
10855 fpcmp_mode = ix86_fp_compare_mode (code);
10856 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10857
10858 if (second_test)
10859 *second_test = NULL_RTX;
10860 if (bypass_test)
10861 *bypass_test = NULL_RTX;
10862
10863 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10864
10865 /* Do fcomi/sahf based test when profitable. */
10866 if ((bypass_code == UNKNOWN || bypass_test)
10867 && (second_code == UNKNOWN || second_test)
10868 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10869 {
10870 if (TARGET_CMOVE)
10871 {
10872 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10873 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10874 tmp);
10875 emit_insn (tmp);
10876 }
10877 else
10878 {
10879 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10880 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10881 if (!scratch)
10882 scratch = gen_reg_rtx (HImode);
10883 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10884 emit_insn (gen_x86_sahf_1 (scratch));
10885 }
10886
10887 /* The FP codes work out to act like unsigned. */
10888 intcmp_mode = fpcmp_mode;
10889 code = first_code;
10890 if (bypass_code != UNKNOWN)
10891 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10892 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10893 const0_rtx);
10894 if (second_code != UNKNOWN)
10895 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10896 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10897 const0_rtx);
10898 }
10899 else
10900 {
10901 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10902 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10903 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10904 if (!scratch)
10905 scratch = gen_reg_rtx (HImode);
10906 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10907
10908 /* In the unordered case, we have to check C2 for NaN's, which
10909 doesn't happen to work out to anything nice combination-wise.
10910 So do some bit twiddling on the value we've got in AH to come
10911 up with an appropriate set of condition codes. */
10912
10913 intcmp_mode = CCNOmode;
10914 switch (code)
10915 {
10916 case GT:
10917 case UNGT:
10918 if (code == GT || !TARGET_IEEE_FP)
10919 {
10920 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10921 code = EQ;
10922 }
10923 else
10924 {
10925 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10926 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10927 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10928 intcmp_mode = CCmode;
10929 code = GEU;
10930 }
10931 break;
10932 case LT:
10933 case UNLT:
10934 if (code == LT && TARGET_IEEE_FP)
10935 {
10936 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10937 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10938 intcmp_mode = CCmode;
10939 code = EQ;
10940 }
10941 else
10942 {
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10944 code = NE;
10945 }
10946 break;
10947 case GE:
10948 case UNGE:
10949 if (code == GE || !TARGET_IEEE_FP)
10950 {
10951 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10952 code = EQ;
10953 }
10954 else
10955 {
10956 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10957 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10958 GEN_INT (0x01)));
10959 code = NE;
10960 }
10961 break;
10962 case LE:
10963 case UNLE:
10964 if (code == LE && TARGET_IEEE_FP)
10965 {
10966 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10967 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10968 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10969 intcmp_mode = CCmode;
10970 code = LTU;
10971 }
10972 else
10973 {
10974 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10975 code = NE;
10976 }
10977 break;
10978 case EQ:
10979 case UNEQ:
10980 if (code == EQ && TARGET_IEEE_FP)
10981 {
10982 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10983 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10984 intcmp_mode = CCmode;
10985 code = EQ;
10986 }
10987 else
10988 {
10989 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10990 code = NE;
10991 break;
10992 }
10993 break;
10994 case NE:
10995 case LTGT:
10996 if (code == NE && TARGET_IEEE_FP)
10997 {
10998 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10999 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11000 GEN_INT (0x40)));
11001 code = NE;
11002 }
11003 else
11004 {
11005 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11006 code = EQ;
11007 }
11008 break;
11009
11010 case UNORDERED:
11011 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11012 code = NE;
11013 break;
11014 case ORDERED:
11015 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11016 code = EQ;
11017 break;
11018
11019 default:
11020 gcc_unreachable ();
11021 }
11022 }
11023
11024 /* Return the test that should be put into the flags user, i.e.
11025 the bcc, scc, or cmov instruction. */
11026 return gen_rtx_fmt_ee (code, VOIDmode,
11027 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11028 const0_rtx);
11029 }
11030
11031 rtx
11032 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11033 {
11034 rtx op0, op1, ret;
11035 op0 = ix86_compare_op0;
11036 op1 = ix86_compare_op1;
11037
11038 if (second_test)
11039 *second_test = NULL_RTX;
11040 if (bypass_test)
11041 *bypass_test = NULL_RTX;
11042
11043 if (ix86_compare_emitted)
11044 {
11045 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11046 ix86_compare_emitted = NULL_RTX;
11047 }
11048 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11049 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11050 second_test, bypass_test);
11051 else
11052 ret = ix86_expand_int_compare (code, op0, op1);
11053
11054 return ret;
11055 }
11056
11057 /* Return true if the CODE will result in nontrivial jump sequence. */
11058 bool
11059 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11060 {
11061 enum rtx_code bypass_code, first_code, second_code;
11062 if (!TARGET_CMOVE)
11063 return true;
11064 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11065 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11066 }
11067
11068 void
11069 ix86_expand_branch (enum rtx_code code, rtx label)
11070 {
11071 rtx tmp;
11072
11073 /* If we have emitted a compare insn, go straight to simple.
11074 ix86_expand_compare won't emit anything if ix86_compare_emitted
11075 is non NULL. */
11076 if (ix86_compare_emitted)
11077 goto simple;
11078
11079 switch (GET_MODE (ix86_compare_op0))
11080 {
11081 case QImode:
11082 case HImode:
11083 case SImode:
11084 simple:
11085 tmp = ix86_expand_compare (code, NULL, NULL);
11086 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11087 gen_rtx_LABEL_REF (VOIDmode, label),
11088 pc_rtx);
11089 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11090 return;
11091
11092 case SFmode:
11093 case DFmode:
11094 case XFmode:
11095 {
11096 rtvec vec;
11097 int use_fcomi;
11098 enum rtx_code bypass_code, first_code, second_code;
11099
11100 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11101 &ix86_compare_op1);
11102
11103 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11104
11105 /* Check whether we will use the natural sequence with one jump. If
11106 so, we can expand jump early. Otherwise delay expansion by
11107 creating compound insn to not confuse optimizers. */
11108 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11109 && TARGET_CMOVE)
11110 {
11111 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11112 gen_rtx_LABEL_REF (VOIDmode, label),
11113 pc_rtx, NULL_RTX, NULL_RTX);
11114 }
11115 else
11116 {
11117 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11118 ix86_compare_op0, ix86_compare_op1);
11119 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11120 gen_rtx_LABEL_REF (VOIDmode, label),
11121 pc_rtx);
11122 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11123
11124 use_fcomi = ix86_use_fcomi_compare (code);
11125 vec = rtvec_alloc (3 + !use_fcomi);
11126 RTVEC_ELT (vec, 0) = tmp;
11127 RTVEC_ELT (vec, 1)
11128 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11129 RTVEC_ELT (vec, 2)
11130 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11131 if (! use_fcomi)
11132 RTVEC_ELT (vec, 3)
11133 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11134
11135 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11136 }
11137 return;
11138 }
11139
11140 case DImode:
11141 if (TARGET_64BIT)
11142 goto simple;
11143 case TImode:
11144 /* Expand DImode branch into multiple compare+branch. */
11145 {
11146 rtx lo[2], hi[2], label2;
11147 enum rtx_code code1, code2, code3;
11148 enum machine_mode submode;
11149
11150 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11151 {
11152 tmp = ix86_compare_op0;
11153 ix86_compare_op0 = ix86_compare_op1;
11154 ix86_compare_op1 = tmp;
11155 code = swap_condition (code);
11156 }
11157 if (GET_MODE (ix86_compare_op0) == DImode)
11158 {
11159 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11160 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11161 submode = SImode;
11162 }
11163 else
11164 {
11165 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11166 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11167 submode = DImode;
11168 }
11169
11170 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11171 avoid two branches. This costs one extra insn, so disable when
11172 optimizing for size. */
11173
11174 if ((code == EQ || code == NE)
11175 && (!optimize_size
11176 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11177 {
11178 rtx xor0, xor1;
11179
11180 xor1 = hi[0];
11181 if (hi[1] != const0_rtx)
11182 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11183 NULL_RTX, 0, OPTAB_WIDEN);
11184
11185 xor0 = lo[0];
11186 if (lo[1] != const0_rtx)
11187 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11188 NULL_RTX, 0, OPTAB_WIDEN);
11189
11190 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11191 NULL_RTX, 0, OPTAB_WIDEN);
11192
11193 ix86_compare_op0 = tmp;
11194 ix86_compare_op1 = const0_rtx;
11195 ix86_expand_branch (code, label);
11196 return;
11197 }
11198
11199 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11200 op1 is a constant and the low word is zero, then we can just
11201 examine the high word. */
11202
11203 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11204 switch (code)
11205 {
11206 case LT: case LTU: case GE: case GEU:
11207 ix86_compare_op0 = hi[0];
11208 ix86_compare_op1 = hi[1];
11209 ix86_expand_branch (code, label);
11210 return;
11211 default:
11212 break;
11213 }
11214
11215 /* Otherwise, we need two or three jumps. */
11216
11217 label2 = gen_label_rtx ();
11218
11219 code1 = code;
11220 code2 = swap_condition (code);
11221 code3 = unsigned_condition (code);
11222
11223 switch (code)
11224 {
11225 case LT: case GT: case LTU: case GTU:
11226 break;
11227
11228 case LE: code1 = LT; code2 = GT; break;
11229 case GE: code1 = GT; code2 = LT; break;
11230 case LEU: code1 = LTU; code2 = GTU; break;
11231 case GEU: code1 = GTU; code2 = LTU; break;
11232
11233 case EQ: code1 = UNKNOWN; code2 = NE; break;
11234 case NE: code2 = UNKNOWN; break;
11235
11236 default:
11237 gcc_unreachable ();
11238 }
11239
11240 /*
11241 * a < b =>
11242 * if (hi(a) < hi(b)) goto true;
11243 * if (hi(a) > hi(b)) goto false;
11244 * if (lo(a) < lo(b)) goto true;
11245 * false:
11246 */
11247
11248 ix86_compare_op0 = hi[0];
11249 ix86_compare_op1 = hi[1];
11250
11251 if (code1 != UNKNOWN)
11252 ix86_expand_branch (code1, label);
11253 if (code2 != UNKNOWN)
11254 ix86_expand_branch (code2, label2);
11255
11256 ix86_compare_op0 = lo[0];
11257 ix86_compare_op1 = lo[1];
11258 ix86_expand_branch (code3, label);
11259
11260 if (code2 != UNKNOWN)
11261 emit_label (label2);
11262 return;
11263 }
11264
11265 default:
11266 gcc_unreachable ();
11267 }
11268 }
11269
11270 /* Split branch based on floating point condition. */
11271 void
11272 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11273 rtx target1, rtx target2, rtx tmp, rtx pushed)
11274 {
11275 rtx second, bypass;
11276 rtx label = NULL_RTX;
11277 rtx condition;
11278 int bypass_probability = -1, second_probability = -1, probability = -1;
11279 rtx i;
11280
11281 if (target2 != pc_rtx)
11282 {
11283 rtx tmp = target2;
11284 code = reverse_condition_maybe_unordered (code);
11285 target2 = target1;
11286 target1 = tmp;
11287 }
11288
11289 condition = ix86_expand_fp_compare (code, op1, op2,
11290 tmp, &second, &bypass);
11291
11292 /* Remove pushed operand from stack. */
11293 if (pushed)
11294 ix86_free_from_memory (GET_MODE (pushed));
11295
11296 if (split_branch_probability >= 0)
11297 {
11298 /* Distribute the probabilities across the jumps.
11299 Assume the BYPASS and SECOND to be always test
11300 for UNORDERED. */
11301 probability = split_branch_probability;
11302
11303 /* Value of 1 is low enough to make no need for probability
11304 to be updated. Later we may run some experiments and see
11305 if unordered values are more frequent in practice. */
11306 if (bypass)
11307 bypass_probability = 1;
11308 if (second)
11309 second_probability = 1;
11310 }
11311 if (bypass != NULL_RTX)
11312 {
11313 label = gen_label_rtx ();
11314 i = emit_jump_insn (gen_rtx_SET
11315 (VOIDmode, pc_rtx,
11316 gen_rtx_IF_THEN_ELSE (VOIDmode,
11317 bypass,
11318 gen_rtx_LABEL_REF (VOIDmode,
11319 label),
11320 pc_rtx)));
11321 if (bypass_probability >= 0)
11322 REG_NOTES (i)
11323 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11324 GEN_INT (bypass_probability),
11325 REG_NOTES (i));
11326 }
11327 i = emit_jump_insn (gen_rtx_SET
11328 (VOIDmode, pc_rtx,
11329 gen_rtx_IF_THEN_ELSE (VOIDmode,
11330 condition, target1, target2)));
11331 if (probability >= 0)
11332 REG_NOTES (i)
11333 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11334 GEN_INT (probability),
11335 REG_NOTES (i));
11336 if (second != NULL_RTX)
11337 {
11338 i = emit_jump_insn (gen_rtx_SET
11339 (VOIDmode, pc_rtx,
11340 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11341 target2)));
11342 if (second_probability >= 0)
11343 REG_NOTES (i)
11344 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11345 GEN_INT (second_probability),
11346 REG_NOTES (i));
11347 }
11348 if (label != NULL_RTX)
11349 emit_label (label);
11350 }
11351
11352 int
11353 ix86_expand_setcc (enum rtx_code code, rtx dest)
11354 {
11355 rtx ret, tmp, tmpreg, equiv;
11356 rtx second_test, bypass_test;
11357
11358 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11359 return 0; /* FAIL */
11360
11361 gcc_assert (GET_MODE (dest) == QImode);
11362
11363 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11364 PUT_MODE (ret, QImode);
11365
11366 tmp = dest;
11367 tmpreg = dest;
11368
11369 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11370 if (bypass_test || second_test)
11371 {
11372 rtx test = second_test;
11373 int bypass = 0;
11374 rtx tmp2 = gen_reg_rtx (QImode);
11375 if (bypass_test)
11376 {
11377 gcc_assert (!second_test);
11378 test = bypass_test;
11379 bypass = 1;
11380 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11381 }
11382 PUT_MODE (test, QImode);
11383 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11384
11385 if (bypass)
11386 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11387 else
11388 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11389 }
11390
11391 /* Attach a REG_EQUAL note describing the comparison result. */
11392 if (ix86_compare_op0 && ix86_compare_op1)
11393 {
11394 equiv = simplify_gen_relational (code, QImode,
11395 GET_MODE (ix86_compare_op0),
11396 ix86_compare_op0, ix86_compare_op1);
11397 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11398 }
11399
11400 return 1; /* DONE */
11401 }
11402
11403 /* Expand comparison setting or clearing carry flag. Return true when
11404 successful and set pop for the operation. */
11405 static bool
11406 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11407 {
11408 enum machine_mode mode =
11409 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11410
11411 /* Do not handle DImode compares that go through special path. Also we can't
11412 deal with FP compares yet. This is possible to add. */
11413 if (mode == (TARGET_64BIT ? TImode : DImode))
11414 return false;
11415 if (FLOAT_MODE_P (mode))
11416 {
11417 rtx second_test = NULL, bypass_test = NULL;
11418 rtx compare_op, compare_seq;
11419
11420 /* Shortcut: following common codes never translate into carry flag compares. */
11421 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11422 || code == ORDERED || code == UNORDERED)
11423 return false;
11424
11425 /* These comparisons require zero flag; swap operands so they won't. */
11426 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11427 && !TARGET_IEEE_FP)
11428 {
11429 rtx tmp = op0;
11430 op0 = op1;
11431 op1 = tmp;
11432 code = swap_condition (code);
11433 }
11434
11435 /* Try to expand the comparison and verify that we end up with carry flag
11436 based comparison. This is fails to be true only when we decide to expand
11437 comparison using arithmetic that is not too common scenario. */
11438 start_sequence ();
11439 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11440 &second_test, &bypass_test);
11441 compare_seq = get_insns ();
11442 end_sequence ();
11443
11444 if (second_test || bypass_test)
11445 return false;
11446 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11447 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11448 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11449 else
11450 code = GET_CODE (compare_op);
11451 if (code != LTU && code != GEU)
11452 return false;
11453 emit_insn (compare_seq);
11454 *pop = compare_op;
11455 return true;
11456 }
11457 if (!INTEGRAL_MODE_P (mode))
11458 return false;
11459 switch (code)
11460 {
11461 case LTU:
11462 case GEU:
11463 break;
11464
11465 /* Convert a==0 into (unsigned)a<1. */
11466 case EQ:
11467 case NE:
11468 if (op1 != const0_rtx)
11469 return false;
11470 op1 = const1_rtx;
11471 code = (code == EQ ? LTU : GEU);
11472 break;
11473
11474 /* Convert a>b into b<a or a>=b-1. */
11475 case GTU:
11476 case LEU:
11477 if (CONST_INT_P (op1))
11478 {
11479 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11480 /* Bail out on overflow. We still can swap operands but that
11481 would force loading of the constant into register. */
11482 if (op1 == const0_rtx
11483 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11484 return false;
11485 code = (code == GTU ? GEU : LTU);
11486 }
11487 else
11488 {
11489 rtx tmp = op1;
11490 op1 = op0;
11491 op0 = tmp;
11492 code = (code == GTU ? LTU : GEU);
11493 }
11494 break;
11495
11496 /* Convert a>=0 into (unsigned)a<0x80000000. */
11497 case LT:
11498 case GE:
11499 if (mode == DImode || op1 != const0_rtx)
11500 return false;
11501 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11502 code = (code == LT ? GEU : LTU);
11503 break;
11504 case LE:
11505 case GT:
11506 if (mode == DImode || op1 != constm1_rtx)
11507 return false;
11508 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11509 code = (code == LE ? GEU : LTU);
11510 break;
11511
11512 default:
11513 return false;
11514 }
11515 /* Swapping operands may cause constant to appear as first operand. */
11516 if (!nonimmediate_operand (op0, VOIDmode))
11517 {
11518 if (no_new_pseudos)
11519 return false;
11520 op0 = force_reg (mode, op0);
11521 }
11522 ix86_compare_op0 = op0;
11523 ix86_compare_op1 = op1;
11524 *pop = ix86_expand_compare (code, NULL, NULL);
11525 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11526 return true;
11527 }
11528
11529 int
11530 ix86_expand_int_movcc (rtx operands[])
11531 {
11532 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11533 rtx compare_seq, compare_op;
11534 rtx second_test, bypass_test;
11535 enum machine_mode mode = GET_MODE (operands[0]);
11536 bool sign_bit_compare_p = false;;
11537
11538 start_sequence ();
11539 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11540 compare_seq = get_insns ();
11541 end_sequence ();
11542
11543 compare_code = GET_CODE (compare_op);
11544
11545 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11546 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11547 sign_bit_compare_p = true;
11548
11549 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11550 HImode insns, we'd be swallowed in word prefix ops. */
11551
11552 if ((mode != HImode || TARGET_FAST_PREFIX)
11553 && (mode != (TARGET_64BIT ? TImode : DImode))
11554 && CONST_INT_P (operands[2])
11555 && CONST_INT_P (operands[3]))
11556 {
11557 rtx out = operands[0];
11558 HOST_WIDE_INT ct = INTVAL (operands[2]);
11559 HOST_WIDE_INT cf = INTVAL (operands[3]);
11560 HOST_WIDE_INT diff;
11561
11562 diff = ct - cf;
11563 /* Sign bit compares are better done using shifts than we do by using
11564 sbb. */
11565 if (sign_bit_compare_p
11566 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11567 ix86_compare_op1, &compare_op))
11568 {
11569 /* Detect overlap between destination and compare sources. */
11570 rtx tmp = out;
11571
11572 if (!sign_bit_compare_p)
11573 {
11574 bool fpcmp = false;
11575
11576 compare_code = GET_CODE (compare_op);
11577
11578 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11579 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11580 {
11581 fpcmp = true;
11582 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11583 }
11584
11585 /* To simplify rest of code, restrict to the GEU case. */
11586 if (compare_code == LTU)
11587 {
11588 HOST_WIDE_INT tmp = ct;
11589 ct = cf;
11590 cf = tmp;
11591 compare_code = reverse_condition (compare_code);
11592 code = reverse_condition (code);
11593 }
11594 else
11595 {
11596 if (fpcmp)
11597 PUT_CODE (compare_op,
11598 reverse_condition_maybe_unordered
11599 (GET_CODE (compare_op)));
11600 else
11601 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11602 }
11603 diff = ct - cf;
11604
11605 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11606 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11607 tmp = gen_reg_rtx (mode);
11608
11609 if (mode == DImode)
11610 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11611 else
11612 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11613 }
11614 else
11615 {
11616 if (code == GT || code == GE)
11617 code = reverse_condition (code);
11618 else
11619 {
11620 HOST_WIDE_INT tmp = ct;
11621 ct = cf;
11622 cf = tmp;
11623 diff = ct - cf;
11624 }
11625 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11626 ix86_compare_op1, VOIDmode, 0, -1);
11627 }
11628
11629 if (diff == 1)
11630 {
11631 /*
11632 * cmpl op0,op1
11633 * sbbl dest,dest
11634 * [addl dest, ct]
11635 *
11636 * Size 5 - 8.
11637 */
11638 if (ct)
11639 tmp = expand_simple_binop (mode, PLUS,
11640 tmp, GEN_INT (ct),
11641 copy_rtx (tmp), 1, OPTAB_DIRECT);
11642 }
11643 else if (cf == -1)
11644 {
11645 /*
11646 * cmpl op0,op1
11647 * sbbl dest,dest
11648 * orl $ct, dest
11649 *
11650 * Size 8.
11651 */
11652 tmp = expand_simple_binop (mode, IOR,
11653 tmp, GEN_INT (ct),
11654 copy_rtx (tmp), 1, OPTAB_DIRECT);
11655 }
11656 else if (diff == -1 && ct)
11657 {
11658 /*
11659 * cmpl op0,op1
11660 * sbbl dest,dest
11661 * notl dest
11662 * [addl dest, cf]
11663 *
11664 * Size 8 - 11.
11665 */
11666 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11667 if (cf)
11668 tmp = expand_simple_binop (mode, PLUS,
11669 copy_rtx (tmp), GEN_INT (cf),
11670 copy_rtx (tmp), 1, OPTAB_DIRECT);
11671 }
11672 else
11673 {
11674 /*
11675 * cmpl op0,op1
11676 * sbbl dest,dest
11677 * [notl dest]
11678 * andl cf - ct, dest
11679 * [addl dest, ct]
11680 *
11681 * Size 8 - 11.
11682 */
11683
11684 if (cf == 0)
11685 {
11686 cf = ct;
11687 ct = 0;
11688 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11689 }
11690
11691 tmp = expand_simple_binop (mode, AND,
11692 copy_rtx (tmp),
11693 gen_int_mode (cf - ct, mode),
11694 copy_rtx (tmp), 1, OPTAB_DIRECT);
11695 if (ct)
11696 tmp = expand_simple_binop (mode, PLUS,
11697 copy_rtx (tmp), GEN_INT (ct),
11698 copy_rtx (tmp), 1, OPTAB_DIRECT);
11699 }
11700
11701 if (!rtx_equal_p (tmp, out))
11702 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11703
11704 return 1; /* DONE */
11705 }
11706
11707 if (diff < 0)
11708 {
11709 HOST_WIDE_INT tmp;
11710 tmp = ct, ct = cf, cf = tmp;
11711 diff = -diff;
11712 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11713 {
11714 /* We may be reversing unordered compare to normal compare, that
11715 is not valid in general (we may convert non-trapping condition
11716 to trapping one), however on i386 we currently emit all
11717 comparisons unordered. */
11718 compare_code = reverse_condition_maybe_unordered (compare_code);
11719 code = reverse_condition_maybe_unordered (code);
11720 }
11721 else
11722 {
11723 compare_code = reverse_condition (compare_code);
11724 code = reverse_condition (code);
11725 }
11726 }
11727
11728 compare_code = UNKNOWN;
11729 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11730 && CONST_INT_P (ix86_compare_op1))
11731 {
11732 if (ix86_compare_op1 == const0_rtx
11733 && (code == LT || code == GE))
11734 compare_code = code;
11735 else if (ix86_compare_op1 == constm1_rtx)
11736 {
11737 if (code == LE)
11738 compare_code = LT;
11739 else if (code == GT)
11740 compare_code = GE;
11741 }
11742 }
11743
11744 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11745 if (compare_code != UNKNOWN
11746 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11747 && (cf == -1 || ct == -1))
11748 {
11749 /* If lea code below could be used, only optimize
11750 if it results in a 2 insn sequence. */
11751
11752 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11753 || diff == 3 || diff == 5 || diff == 9)
11754 || (compare_code == LT && ct == -1)
11755 || (compare_code == GE && cf == -1))
11756 {
11757 /*
11758 * notl op1 (if necessary)
11759 * sarl $31, op1
11760 * orl cf, op1
11761 */
11762 if (ct != -1)
11763 {
11764 cf = ct;
11765 ct = -1;
11766 code = reverse_condition (code);
11767 }
11768
11769 out = emit_store_flag (out, code, ix86_compare_op0,
11770 ix86_compare_op1, VOIDmode, 0, -1);
11771
11772 out = expand_simple_binop (mode, IOR,
11773 out, GEN_INT (cf),
11774 out, 1, OPTAB_DIRECT);
11775 if (out != operands[0])
11776 emit_move_insn (operands[0], out);
11777
11778 return 1; /* DONE */
11779 }
11780 }
11781
11782
11783 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11784 || diff == 3 || diff == 5 || diff == 9)
11785 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11786 && (mode != DImode
11787 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11788 {
11789 /*
11790 * xorl dest,dest
11791 * cmpl op1,op2
11792 * setcc dest
11793 * lea cf(dest*(ct-cf)),dest
11794 *
11795 * Size 14.
11796 *
11797 * This also catches the degenerate setcc-only case.
11798 */
11799
11800 rtx tmp;
11801 int nops;
11802
11803 out = emit_store_flag (out, code, ix86_compare_op0,
11804 ix86_compare_op1, VOIDmode, 0, 1);
11805
11806 nops = 0;
11807 /* On x86_64 the lea instruction operates on Pmode, so we need
11808 to get arithmetics done in proper mode to match. */
11809 if (diff == 1)
11810 tmp = copy_rtx (out);
11811 else
11812 {
11813 rtx out1;
11814 out1 = copy_rtx (out);
11815 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11816 nops++;
11817 if (diff & 1)
11818 {
11819 tmp = gen_rtx_PLUS (mode, tmp, out1);
11820 nops++;
11821 }
11822 }
11823 if (cf != 0)
11824 {
11825 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11826 nops++;
11827 }
11828 if (!rtx_equal_p (tmp, out))
11829 {
11830 if (nops == 1)
11831 out = force_operand (tmp, copy_rtx (out));
11832 else
11833 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11834 }
11835 if (!rtx_equal_p (out, operands[0]))
11836 emit_move_insn (operands[0], copy_rtx (out));
11837
11838 return 1; /* DONE */
11839 }
11840
11841 /*
11842 * General case: Jumpful:
11843 * xorl dest,dest cmpl op1, op2
11844 * cmpl op1, op2 movl ct, dest
11845 * setcc dest jcc 1f
11846 * decl dest movl cf, dest
11847 * andl (cf-ct),dest 1:
11848 * addl ct,dest
11849 *
11850 * Size 20. Size 14.
11851 *
11852 * This is reasonably steep, but branch mispredict costs are
11853 * high on modern cpus, so consider failing only if optimizing
11854 * for space.
11855 */
11856
11857 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11858 && BRANCH_COST >= 2)
11859 {
11860 if (cf == 0)
11861 {
11862 cf = ct;
11863 ct = 0;
11864 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11865 /* We may be reversing unordered compare to normal compare,
11866 that is not valid in general (we may convert non-trapping
11867 condition to trapping one), however on i386 we currently
11868 emit all comparisons unordered. */
11869 code = reverse_condition_maybe_unordered (code);
11870 else
11871 {
11872 code = reverse_condition (code);
11873 if (compare_code != UNKNOWN)
11874 compare_code = reverse_condition (compare_code);
11875 }
11876 }
11877
11878 if (compare_code != UNKNOWN)
11879 {
11880 /* notl op1 (if needed)
11881 sarl $31, op1
11882 andl (cf-ct), op1
11883 addl ct, op1
11884
11885 For x < 0 (resp. x <= -1) there will be no notl,
11886 so if possible swap the constants to get rid of the
11887 complement.
11888 True/false will be -1/0 while code below (store flag
11889 followed by decrement) is 0/-1, so the constants need
11890 to be exchanged once more. */
11891
11892 if (compare_code == GE || !cf)
11893 {
11894 code = reverse_condition (code);
11895 compare_code = LT;
11896 }
11897 else
11898 {
11899 HOST_WIDE_INT tmp = cf;
11900 cf = ct;
11901 ct = tmp;
11902 }
11903
11904 out = emit_store_flag (out, code, ix86_compare_op0,
11905 ix86_compare_op1, VOIDmode, 0, -1);
11906 }
11907 else
11908 {
11909 out = emit_store_flag (out, code, ix86_compare_op0,
11910 ix86_compare_op1, VOIDmode, 0, 1);
11911
11912 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11913 copy_rtx (out), 1, OPTAB_DIRECT);
11914 }
11915
11916 out = expand_simple_binop (mode, AND, copy_rtx (out),
11917 gen_int_mode (cf - ct, mode),
11918 copy_rtx (out), 1, OPTAB_DIRECT);
11919 if (ct)
11920 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11921 copy_rtx (out), 1, OPTAB_DIRECT);
11922 if (!rtx_equal_p (out, operands[0]))
11923 emit_move_insn (operands[0], copy_rtx (out));
11924
11925 return 1; /* DONE */
11926 }
11927 }
11928
11929 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11930 {
11931 /* Try a few things more with specific constants and a variable. */
11932
11933 optab op;
11934 rtx var, orig_out, out, tmp;
11935
11936 if (BRANCH_COST <= 2)
11937 return 0; /* FAIL */
11938
11939 /* If one of the two operands is an interesting constant, load a
11940 constant with the above and mask it in with a logical operation. */
11941
11942 if (CONST_INT_P (operands[2]))
11943 {
11944 var = operands[3];
11945 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11946 operands[3] = constm1_rtx, op = and_optab;
11947 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11948 operands[3] = const0_rtx, op = ior_optab;
11949 else
11950 return 0; /* FAIL */
11951 }
11952 else if (CONST_INT_P (operands[3]))
11953 {
11954 var = operands[2];
11955 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11956 operands[2] = constm1_rtx, op = and_optab;
11957 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11958 operands[2] = const0_rtx, op = ior_optab;
11959 else
11960 return 0; /* FAIL */
11961 }
11962 else
11963 return 0; /* FAIL */
11964
11965 orig_out = operands[0];
11966 tmp = gen_reg_rtx (mode);
11967 operands[0] = tmp;
11968
11969 /* Recurse to get the constant loaded. */
11970 if (ix86_expand_int_movcc (operands) == 0)
11971 return 0; /* FAIL */
11972
11973 /* Mask in the interesting variable. */
11974 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11975 OPTAB_WIDEN);
11976 if (!rtx_equal_p (out, orig_out))
11977 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11978
11979 return 1; /* DONE */
11980 }
11981
11982 /*
11983 * For comparison with above,
11984 *
11985 * movl cf,dest
11986 * movl ct,tmp
11987 * cmpl op1,op2
11988 * cmovcc tmp,dest
11989 *
11990 * Size 15.
11991 */
11992
11993 if (! nonimmediate_operand (operands[2], mode))
11994 operands[2] = force_reg (mode, operands[2]);
11995 if (! nonimmediate_operand (operands[3], mode))
11996 operands[3] = force_reg (mode, operands[3]);
11997
11998 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11999 {
12000 rtx tmp = gen_reg_rtx (mode);
12001 emit_move_insn (tmp, operands[3]);
12002 operands[3] = tmp;
12003 }
12004 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12005 {
12006 rtx tmp = gen_reg_rtx (mode);
12007 emit_move_insn (tmp, operands[2]);
12008 operands[2] = tmp;
12009 }
12010
12011 if (! register_operand (operands[2], VOIDmode)
12012 && (mode == QImode
12013 || ! register_operand (operands[3], VOIDmode)))
12014 operands[2] = force_reg (mode, operands[2]);
12015
12016 if (mode == QImode
12017 && ! register_operand (operands[3], VOIDmode))
12018 operands[3] = force_reg (mode, operands[3]);
12019
12020 emit_insn (compare_seq);
12021 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12022 gen_rtx_IF_THEN_ELSE (mode,
12023 compare_op, operands[2],
12024 operands[3])));
12025 if (bypass_test)
12026 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12027 gen_rtx_IF_THEN_ELSE (mode,
12028 bypass_test,
12029 copy_rtx (operands[3]),
12030 copy_rtx (operands[0]))));
12031 if (second_test)
12032 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12033 gen_rtx_IF_THEN_ELSE (mode,
12034 second_test,
12035 copy_rtx (operands[2]),
12036 copy_rtx (operands[0]))));
12037
12038 return 1; /* DONE */
12039 }
12040
12041 /* Swap, force into registers, or otherwise massage the two operands
12042 to an sse comparison with a mask result. Thus we differ a bit from
12043 ix86_prepare_fp_compare_args which expects to produce a flags result.
12044
12045 The DEST operand exists to help determine whether to commute commutative
12046 operators. The POP0/POP1 operands are updated in place. The new
12047 comparison code is returned, or UNKNOWN if not implementable. */
12048
12049 static enum rtx_code
12050 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12051 rtx *pop0, rtx *pop1)
12052 {
12053 rtx tmp;
12054
12055 switch (code)
12056 {
12057 case LTGT:
12058 case UNEQ:
12059 /* We have no LTGT as an operator. We could implement it with
12060 NE & ORDERED, but this requires an extra temporary. It's
12061 not clear that it's worth it. */
12062 return UNKNOWN;
12063
12064 case LT:
12065 case LE:
12066 case UNGT:
12067 case UNGE:
12068 /* These are supported directly. */
12069 break;
12070
12071 case EQ:
12072 case NE:
12073 case UNORDERED:
12074 case ORDERED:
12075 /* For commutative operators, try to canonicalize the destination
12076 operand to be first in the comparison - this helps reload to
12077 avoid extra moves. */
12078 if (!dest || !rtx_equal_p (dest, *pop1))
12079 break;
12080 /* FALLTHRU */
12081
12082 case GE:
12083 case GT:
12084 case UNLE:
12085 case UNLT:
12086 /* These are not supported directly. Swap the comparison operands
12087 to transform into something that is supported. */
12088 tmp = *pop0;
12089 *pop0 = *pop1;
12090 *pop1 = tmp;
12091 code = swap_condition (code);
12092 break;
12093
12094 default:
12095 gcc_unreachable ();
12096 }
12097
12098 return code;
12099 }
12100
12101 /* Detect conditional moves that exactly match min/max operational
12102 semantics. Note that this is IEEE safe, as long as we don't
12103 interchange the operands.
12104
12105 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12106 and TRUE if the operation is successful and instructions are emitted. */
12107
12108 static bool
12109 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12110 rtx cmp_op1, rtx if_true, rtx if_false)
12111 {
12112 enum machine_mode mode;
12113 bool is_min;
12114 rtx tmp;
12115
12116 if (code == LT)
12117 ;
12118 else if (code == UNGE)
12119 {
12120 tmp = if_true;
12121 if_true = if_false;
12122 if_false = tmp;
12123 }
12124 else
12125 return false;
12126
12127 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12128 is_min = true;
12129 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12130 is_min = false;
12131 else
12132 return false;
12133
12134 mode = GET_MODE (dest);
12135
12136 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12137 but MODE may be a vector mode and thus not appropriate. */
12138 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12139 {
12140 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12141 rtvec v;
12142
12143 if_true = force_reg (mode, if_true);
12144 v = gen_rtvec (2, if_true, if_false);
12145 tmp = gen_rtx_UNSPEC (mode, v, u);
12146 }
12147 else
12148 {
12149 code = is_min ? SMIN : SMAX;
12150 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12151 }
12152
12153 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12154 return true;
12155 }
12156
12157 /* Expand an sse vector comparison. Return the register with the result. */
12158
12159 static rtx
12160 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12161 rtx op_true, rtx op_false)
12162 {
12163 enum machine_mode mode = GET_MODE (dest);
12164 rtx x;
12165
12166 cmp_op0 = force_reg (mode, cmp_op0);
12167 if (!nonimmediate_operand (cmp_op1, mode))
12168 cmp_op1 = force_reg (mode, cmp_op1);
12169
12170 if (optimize
12171 || reg_overlap_mentioned_p (dest, op_true)
12172 || reg_overlap_mentioned_p (dest, op_false))
12173 dest = gen_reg_rtx (mode);
12174
12175 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12176 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12177
12178 return dest;
12179 }
12180
12181 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12182 operations. This is used for both scalar and vector conditional moves. */
12183
12184 static void
12185 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12186 {
12187 enum machine_mode mode = GET_MODE (dest);
12188 rtx t2, t3, x;
12189
12190 if (op_false == CONST0_RTX (mode))
12191 {
12192 op_true = force_reg (mode, op_true);
12193 x = gen_rtx_AND (mode, cmp, op_true);
12194 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12195 }
12196 else if (op_true == CONST0_RTX (mode))
12197 {
12198 op_false = force_reg (mode, op_false);
12199 x = gen_rtx_NOT (mode, cmp);
12200 x = gen_rtx_AND (mode, x, op_false);
12201 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12202 }
12203 else
12204 {
12205 op_true = force_reg (mode, op_true);
12206 op_false = force_reg (mode, op_false);
12207
12208 t2 = gen_reg_rtx (mode);
12209 if (optimize)
12210 t3 = gen_reg_rtx (mode);
12211 else
12212 t3 = dest;
12213
12214 x = gen_rtx_AND (mode, op_true, cmp);
12215 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12216
12217 x = gen_rtx_NOT (mode, cmp);
12218 x = gen_rtx_AND (mode, x, op_false);
12219 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12220
12221 x = gen_rtx_IOR (mode, t3, t2);
12222 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12223 }
12224 }
12225
12226 /* Expand a floating-point conditional move. Return true if successful. */
12227
12228 int
12229 ix86_expand_fp_movcc (rtx operands[])
12230 {
12231 enum machine_mode mode = GET_MODE (operands[0]);
12232 enum rtx_code code = GET_CODE (operands[1]);
12233 rtx tmp, compare_op, second_test, bypass_test;
12234
12235 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12236 {
12237 enum machine_mode cmode;
12238
12239 /* Since we've no cmove for sse registers, don't force bad register
12240 allocation just to gain access to it. Deny movcc when the
12241 comparison mode doesn't match the move mode. */
12242 cmode = GET_MODE (ix86_compare_op0);
12243 if (cmode == VOIDmode)
12244 cmode = GET_MODE (ix86_compare_op1);
12245 if (cmode != mode)
12246 return 0;
12247
12248 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12249 &ix86_compare_op0,
12250 &ix86_compare_op1);
12251 if (code == UNKNOWN)
12252 return 0;
12253
12254 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12255 ix86_compare_op1, operands[2],
12256 operands[3]))
12257 return 1;
12258
12259 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12260 ix86_compare_op1, operands[2], operands[3]);
12261 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12262 return 1;
12263 }
12264
12265 /* The floating point conditional move instructions don't directly
12266 support conditions resulting from a signed integer comparison. */
12267
12268 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12269
12270 /* The floating point conditional move instructions don't directly
12271 support signed integer comparisons. */
12272
12273 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12274 {
12275 gcc_assert (!second_test && !bypass_test);
12276 tmp = gen_reg_rtx (QImode);
12277 ix86_expand_setcc (code, tmp);
12278 code = NE;
12279 ix86_compare_op0 = tmp;
12280 ix86_compare_op1 = const0_rtx;
12281 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12282 }
12283 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12284 {
12285 tmp = gen_reg_rtx (mode);
12286 emit_move_insn (tmp, operands[3]);
12287 operands[3] = tmp;
12288 }
12289 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12290 {
12291 tmp = gen_reg_rtx (mode);
12292 emit_move_insn (tmp, operands[2]);
12293 operands[2] = tmp;
12294 }
12295
12296 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12297 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12298 operands[2], operands[3])));
12299 if (bypass_test)
12300 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12301 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12302 operands[3], operands[0])));
12303 if (second_test)
12304 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12305 gen_rtx_IF_THEN_ELSE (mode, second_test,
12306 operands[2], operands[0])));
12307
12308 return 1;
12309 }
12310
12311 /* Expand a floating-point vector conditional move; a vcond operation
12312 rather than a movcc operation. */
12313
12314 bool
12315 ix86_expand_fp_vcond (rtx operands[])
12316 {
12317 enum rtx_code code = GET_CODE (operands[3]);
12318 rtx cmp;
12319
12320 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12321 &operands[4], &operands[5]);
12322 if (code == UNKNOWN)
12323 return false;
12324
12325 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12326 operands[5], operands[1], operands[2]))
12327 return true;
12328
12329 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12330 operands[1], operands[2]);
12331 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12332 return true;
12333 }
12334
12335 /* Expand a signed integral vector conditional move. */
12336
12337 bool
12338 ix86_expand_int_vcond (rtx operands[])
12339 {
12340 enum machine_mode mode = GET_MODE (operands[0]);
12341 enum rtx_code code = GET_CODE (operands[3]);
12342 bool negate = false;
12343 rtx x, cop0, cop1;
12344
12345 cop0 = operands[4];
12346 cop1 = operands[5];
12347
12348 /* Canonicalize the comparison to EQ, GT, GTU. */
12349 switch (code)
12350 {
12351 case EQ:
12352 case GT:
12353 case GTU:
12354 break;
12355
12356 case NE:
12357 case LE:
12358 case LEU:
12359 code = reverse_condition (code);
12360 negate = true;
12361 break;
12362
12363 case GE:
12364 case GEU:
12365 code = reverse_condition (code);
12366 negate = true;
12367 /* FALLTHRU */
12368
12369 case LT:
12370 case LTU:
12371 code = swap_condition (code);
12372 x = cop0, cop0 = cop1, cop1 = x;
12373 break;
12374
12375 default:
12376 gcc_unreachable ();
12377 }
12378
12379 /* Unsigned parallel compare is not supported by the hardware. Play some
12380 tricks to turn this into a signed comparison against 0. */
12381 if (code == GTU)
12382 {
12383 cop0 = force_reg (mode, cop0);
12384
12385 switch (mode)
12386 {
12387 case V4SImode:
12388 {
12389 rtx t1, t2, mask;
12390
12391 /* Perform a parallel modulo subtraction. */
12392 t1 = gen_reg_rtx (mode);
12393 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12394
12395 /* Extract the original sign bit of op0. */
12396 mask = GEN_INT (-0x80000000);
12397 mask = gen_rtx_CONST_VECTOR (mode,
12398 gen_rtvec (4, mask, mask, mask, mask));
12399 mask = force_reg (mode, mask);
12400 t2 = gen_reg_rtx (mode);
12401 emit_insn (gen_andv4si3 (t2, cop0, mask));
12402
12403 /* XOR it back into the result of the subtraction. This results
12404 in the sign bit set iff we saw unsigned underflow. */
12405 x = gen_reg_rtx (mode);
12406 emit_insn (gen_xorv4si3 (x, t1, t2));
12407
12408 code = GT;
12409 }
12410 break;
12411
12412 case V16QImode:
12413 case V8HImode:
12414 /* Perform a parallel unsigned saturating subtraction. */
12415 x = gen_reg_rtx (mode);
12416 emit_insn (gen_rtx_SET (VOIDmode, x,
12417 gen_rtx_US_MINUS (mode, cop0, cop1)));
12418
12419 code = EQ;
12420 negate = !negate;
12421 break;
12422
12423 default:
12424 gcc_unreachable ();
12425 }
12426
12427 cop0 = x;
12428 cop1 = CONST0_RTX (mode);
12429 }
12430
12431 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12432 operands[1+negate], operands[2-negate]);
12433
12434 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12435 operands[2-negate]);
12436 return true;
12437 }
12438
12439 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12440 true if we should do zero extension, else sign extension. HIGH_P is
12441 true if we want the N/2 high elements, else the low elements. */
12442
12443 void
12444 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12445 {
12446 enum machine_mode imode = GET_MODE (operands[1]);
12447 rtx (*unpack)(rtx, rtx, rtx);
12448 rtx se, dest;
12449
12450 switch (imode)
12451 {
12452 case V16QImode:
12453 if (high_p)
12454 unpack = gen_vec_interleave_highv16qi;
12455 else
12456 unpack = gen_vec_interleave_lowv16qi;
12457 break;
12458 case V8HImode:
12459 if (high_p)
12460 unpack = gen_vec_interleave_highv8hi;
12461 else
12462 unpack = gen_vec_interleave_lowv8hi;
12463 break;
12464 case V4SImode:
12465 if (high_p)
12466 unpack = gen_vec_interleave_highv4si;
12467 else
12468 unpack = gen_vec_interleave_lowv4si;
12469 break;
12470 default:
12471 gcc_unreachable ();
12472 }
12473
12474 dest = gen_lowpart (imode, operands[0]);
12475
12476 if (unsigned_p)
12477 se = force_reg (imode, CONST0_RTX (imode));
12478 else
12479 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12480 operands[1], pc_rtx, pc_rtx);
12481
12482 emit_insn (unpack (dest, operands[1], se));
12483 }
12484
12485 /* Expand conditional increment or decrement using adb/sbb instructions.
12486 The default case using setcc followed by the conditional move can be
12487 done by generic code. */
12488 int
12489 ix86_expand_int_addcc (rtx operands[])
12490 {
12491 enum rtx_code code = GET_CODE (operands[1]);
12492 rtx compare_op;
12493 rtx val = const0_rtx;
12494 bool fpcmp = false;
12495 enum machine_mode mode = GET_MODE (operands[0]);
12496
12497 if (operands[3] != const1_rtx
12498 && operands[3] != constm1_rtx)
12499 return 0;
12500 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12501 ix86_compare_op1, &compare_op))
12502 return 0;
12503 code = GET_CODE (compare_op);
12504
12505 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12506 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12507 {
12508 fpcmp = true;
12509 code = ix86_fp_compare_code_to_integer (code);
12510 }
12511
12512 if (code != LTU)
12513 {
12514 val = constm1_rtx;
12515 if (fpcmp)
12516 PUT_CODE (compare_op,
12517 reverse_condition_maybe_unordered
12518 (GET_CODE (compare_op)));
12519 else
12520 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12521 }
12522 PUT_MODE (compare_op, mode);
12523
12524 /* Construct either adc or sbb insn. */
12525 if ((code == LTU) == (operands[3] == constm1_rtx))
12526 {
12527 switch (GET_MODE (operands[0]))
12528 {
12529 case QImode:
12530 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12531 break;
12532 case HImode:
12533 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12534 break;
12535 case SImode:
12536 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12537 break;
12538 case DImode:
12539 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12540 break;
12541 default:
12542 gcc_unreachable ();
12543 }
12544 }
12545 else
12546 {
12547 switch (GET_MODE (operands[0]))
12548 {
12549 case QImode:
12550 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12551 break;
12552 case HImode:
12553 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12554 break;
12555 case SImode:
12556 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12557 break;
12558 case DImode:
12559 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12560 break;
12561 default:
12562 gcc_unreachable ();
12563 }
12564 }
12565 return 1; /* DONE */
12566 }
12567
12568
12569 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12570 works for floating pointer parameters and nonoffsetable memories.
12571 For pushes, it returns just stack offsets; the values will be saved
12572 in the right order. Maximally three parts are generated. */
12573
12574 static int
12575 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12576 {
12577 int size;
12578
12579 if (!TARGET_64BIT)
12580 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12581 else
12582 size = (GET_MODE_SIZE (mode) + 4) / 8;
12583
12584 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12585 gcc_assert (size >= 2 && size <= 3);
12586
12587 /* Optimize constant pool reference to immediates. This is used by fp
12588 moves, that force all constants to memory to allow combining. */
12589 if (MEM_P (operand) && MEM_READONLY_P (operand))
12590 {
12591 rtx tmp = maybe_get_pool_constant (operand);
12592 if (tmp)
12593 operand = tmp;
12594 }
12595
12596 if (MEM_P (operand) && !offsettable_memref_p (operand))
12597 {
12598 /* The only non-offsetable memories we handle are pushes. */
12599 int ok = push_operand (operand, VOIDmode);
12600
12601 gcc_assert (ok);
12602
12603 operand = copy_rtx (operand);
12604 PUT_MODE (operand, Pmode);
12605 parts[0] = parts[1] = parts[2] = operand;
12606 return size;
12607 }
12608
12609 if (GET_CODE (operand) == CONST_VECTOR)
12610 {
12611 enum machine_mode imode = int_mode_for_mode (mode);
12612 /* Caution: if we looked through a constant pool memory above,
12613 the operand may actually have a different mode now. That's
12614 ok, since we want to pun this all the way back to an integer. */
12615 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12616 gcc_assert (operand != NULL);
12617 mode = imode;
12618 }
12619
12620 if (!TARGET_64BIT)
12621 {
12622 if (mode == DImode)
12623 split_di (&operand, 1, &parts[0], &parts[1]);
12624 else
12625 {
12626 if (REG_P (operand))
12627 {
12628 gcc_assert (reload_completed);
12629 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12630 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12631 if (size == 3)
12632 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12633 }
12634 else if (offsettable_memref_p (operand))
12635 {
12636 operand = adjust_address (operand, SImode, 0);
12637 parts[0] = operand;
12638 parts[1] = adjust_address (operand, SImode, 4);
12639 if (size == 3)
12640 parts[2] = adjust_address (operand, SImode, 8);
12641 }
12642 else if (GET_CODE (operand) == CONST_DOUBLE)
12643 {
12644 REAL_VALUE_TYPE r;
12645 long l[4];
12646
12647 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12648 switch (mode)
12649 {
12650 case XFmode:
12651 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12652 parts[2] = gen_int_mode (l[2], SImode);
12653 break;
12654 case DFmode:
12655 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12656 break;
12657 default:
12658 gcc_unreachable ();
12659 }
12660 parts[1] = gen_int_mode (l[1], SImode);
12661 parts[0] = gen_int_mode (l[0], SImode);
12662 }
12663 else
12664 gcc_unreachable ();
12665 }
12666 }
12667 else
12668 {
12669 if (mode == TImode)
12670 split_ti (&operand, 1, &parts[0], &parts[1]);
12671 if (mode == XFmode || mode == TFmode)
12672 {
12673 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12674 if (REG_P (operand))
12675 {
12676 gcc_assert (reload_completed);
12677 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12678 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12679 }
12680 else if (offsettable_memref_p (operand))
12681 {
12682 operand = adjust_address (operand, DImode, 0);
12683 parts[0] = operand;
12684 parts[1] = adjust_address (operand, upper_mode, 8);
12685 }
12686 else if (GET_CODE (operand) == CONST_DOUBLE)
12687 {
12688 REAL_VALUE_TYPE r;
12689 long l[4];
12690
12691 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12692 real_to_target (l, &r, mode);
12693
12694 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12695 if (HOST_BITS_PER_WIDE_INT >= 64)
12696 parts[0]
12697 = gen_int_mode
12698 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12699 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12700 DImode);
12701 else
12702 parts[0] = immed_double_const (l[0], l[1], DImode);
12703
12704 if (upper_mode == SImode)
12705 parts[1] = gen_int_mode (l[2], SImode);
12706 else if (HOST_BITS_PER_WIDE_INT >= 64)
12707 parts[1]
12708 = gen_int_mode
12709 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12710 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12711 DImode);
12712 else
12713 parts[1] = immed_double_const (l[2], l[3], DImode);
12714 }
12715 else
12716 gcc_unreachable ();
12717 }
12718 }
12719
12720 return size;
12721 }
12722
12723 /* Emit insns to perform a move or push of DI, DF, and XF values.
12724 Return false when normal moves are needed; true when all required
12725 insns have been emitted. Operands 2-4 contain the input values
12726 int the correct order; operands 5-7 contain the output values. */
12727
12728 void
12729 ix86_split_long_move (rtx operands[])
12730 {
12731 rtx part[2][3];
12732 int nparts;
12733 int push = 0;
12734 int collisions = 0;
12735 enum machine_mode mode = GET_MODE (operands[0]);
12736
12737 /* The DFmode expanders may ask us to move double.
12738 For 64bit target this is single move. By hiding the fact
12739 here we simplify i386.md splitters. */
12740 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12741 {
12742 /* Optimize constant pool reference to immediates. This is used by
12743 fp moves, that force all constants to memory to allow combining. */
12744
12745 if (MEM_P (operands[1])
12746 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12747 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12748 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12749 if (push_operand (operands[0], VOIDmode))
12750 {
12751 operands[0] = copy_rtx (operands[0]);
12752 PUT_MODE (operands[0], Pmode);
12753 }
12754 else
12755 operands[0] = gen_lowpart (DImode, operands[0]);
12756 operands[1] = gen_lowpart (DImode, operands[1]);
12757 emit_move_insn (operands[0], operands[1]);
12758 return;
12759 }
12760
12761 /* The only non-offsettable memory we handle is push. */
12762 if (push_operand (operands[0], VOIDmode))
12763 push = 1;
12764 else
12765 gcc_assert (!MEM_P (operands[0])
12766 || offsettable_memref_p (operands[0]));
12767
12768 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12769 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12770
12771 /* When emitting push, take care for source operands on the stack. */
12772 if (push && MEM_P (operands[1])
12773 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12774 {
12775 if (nparts == 3)
12776 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12777 XEXP (part[1][2], 0));
12778 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12779 XEXP (part[1][1], 0));
12780 }
12781
12782 /* We need to do copy in the right order in case an address register
12783 of the source overlaps the destination. */
12784 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12785 {
12786 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12787 collisions++;
12788 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12789 collisions++;
12790 if (nparts == 3
12791 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12792 collisions++;
12793
12794 /* Collision in the middle part can be handled by reordering. */
12795 if (collisions == 1 && nparts == 3
12796 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12797 {
12798 rtx tmp;
12799 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12800 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12801 }
12802
12803 /* If there are more collisions, we can't handle it by reordering.
12804 Do an lea to the last part and use only one colliding move. */
12805 else if (collisions > 1)
12806 {
12807 rtx base;
12808
12809 collisions = 1;
12810
12811 base = part[0][nparts - 1];
12812
12813 /* Handle the case when the last part isn't valid for lea.
12814 Happens in 64-bit mode storing the 12-byte XFmode. */
12815 if (GET_MODE (base) != Pmode)
12816 base = gen_rtx_REG (Pmode, REGNO (base));
12817
12818 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12819 part[1][0] = replace_equiv_address (part[1][0], base);
12820 part[1][1] = replace_equiv_address (part[1][1],
12821 plus_constant (base, UNITS_PER_WORD));
12822 if (nparts == 3)
12823 part[1][2] = replace_equiv_address (part[1][2],
12824 plus_constant (base, 8));
12825 }
12826 }
12827
12828 if (push)
12829 {
12830 if (!TARGET_64BIT)
12831 {
12832 if (nparts == 3)
12833 {
12834 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12835 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12836 emit_move_insn (part[0][2], part[1][2]);
12837 }
12838 }
12839 else
12840 {
12841 /* In 64bit mode we don't have 32bit push available. In case this is
12842 register, it is OK - we will just use larger counterpart. We also
12843 retype memory - these comes from attempt to avoid REX prefix on
12844 moving of second half of TFmode value. */
12845 if (GET_MODE (part[1][1]) == SImode)
12846 {
12847 switch (GET_CODE (part[1][1]))
12848 {
12849 case MEM:
12850 part[1][1] = adjust_address (part[1][1], DImode, 0);
12851 break;
12852
12853 case REG:
12854 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12855 break;
12856
12857 default:
12858 gcc_unreachable ();
12859 }
12860
12861 if (GET_MODE (part[1][0]) == SImode)
12862 part[1][0] = part[1][1];
12863 }
12864 }
12865 emit_move_insn (part[0][1], part[1][1]);
12866 emit_move_insn (part[0][0], part[1][0]);
12867 return;
12868 }
12869
12870 /* Choose correct order to not overwrite the source before it is copied. */
12871 if ((REG_P (part[0][0])
12872 && REG_P (part[1][1])
12873 && (REGNO (part[0][0]) == REGNO (part[1][1])
12874 || (nparts == 3
12875 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12876 || (collisions > 0
12877 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12878 {
12879 if (nparts == 3)
12880 {
12881 operands[2] = part[0][2];
12882 operands[3] = part[0][1];
12883 operands[4] = part[0][0];
12884 operands[5] = part[1][2];
12885 operands[6] = part[1][1];
12886 operands[7] = part[1][0];
12887 }
12888 else
12889 {
12890 operands[2] = part[0][1];
12891 operands[3] = part[0][0];
12892 operands[5] = part[1][1];
12893 operands[6] = part[1][0];
12894 }
12895 }
12896 else
12897 {
12898 if (nparts == 3)
12899 {
12900 operands[2] = part[0][0];
12901 operands[3] = part[0][1];
12902 operands[4] = part[0][2];
12903 operands[5] = part[1][0];
12904 operands[6] = part[1][1];
12905 operands[7] = part[1][2];
12906 }
12907 else
12908 {
12909 operands[2] = part[0][0];
12910 operands[3] = part[0][1];
12911 operands[5] = part[1][0];
12912 operands[6] = part[1][1];
12913 }
12914 }
12915
12916 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12917 if (optimize_size)
12918 {
12919 if (CONST_INT_P (operands[5])
12920 && operands[5] != const0_rtx
12921 && REG_P (operands[2]))
12922 {
12923 if (CONST_INT_P (operands[6])
12924 && INTVAL (operands[6]) == INTVAL (operands[5]))
12925 operands[6] = operands[2];
12926
12927 if (nparts == 3
12928 && CONST_INT_P (operands[7])
12929 && INTVAL (operands[7]) == INTVAL (operands[5]))
12930 operands[7] = operands[2];
12931 }
12932
12933 if (nparts == 3
12934 && CONST_INT_P (operands[6])
12935 && operands[6] != const0_rtx
12936 && REG_P (operands[3])
12937 && CONST_INT_P (operands[7])
12938 && INTVAL (operands[7]) == INTVAL (operands[6]))
12939 operands[7] = operands[3];
12940 }
12941
12942 emit_move_insn (operands[2], operands[5]);
12943 emit_move_insn (operands[3], operands[6]);
12944 if (nparts == 3)
12945 emit_move_insn (operands[4], operands[7]);
12946
12947 return;
12948 }
12949
12950 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12951 left shift by a constant, either using a single shift or
12952 a sequence of add instructions. */
12953
12954 static void
12955 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12956 {
12957 if (count == 1)
12958 {
12959 emit_insn ((mode == DImode
12960 ? gen_addsi3
12961 : gen_adddi3) (operand, operand, operand));
12962 }
12963 else if (!optimize_size
12964 && count * ix86_cost->add <= ix86_cost->shift_const)
12965 {
12966 int i;
12967 for (i=0; i<count; i++)
12968 {
12969 emit_insn ((mode == DImode
12970 ? gen_addsi3
12971 : gen_adddi3) (operand, operand, operand));
12972 }
12973 }
12974 else
12975 emit_insn ((mode == DImode
12976 ? gen_ashlsi3
12977 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12978 }
12979
12980 void
12981 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12982 {
12983 rtx low[2], high[2];
12984 int count;
12985 const int single_width = mode == DImode ? 32 : 64;
12986
12987 if (CONST_INT_P (operands[2]))
12988 {
12989 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12990 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12991
12992 if (count >= single_width)
12993 {
12994 emit_move_insn (high[0], low[1]);
12995 emit_move_insn (low[0], const0_rtx);
12996
12997 if (count > single_width)
12998 ix86_expand_ashl_const (high[0], count - single_width, mode);
12999 }
13000 else
13001 {
13002 if (!rtx_equal_p (operands[0], operands[1]))
13003 emit_move_insn (operands[0], operands[1]);
13004 emit_insn ((mode == DImode
13005 ? gen_x86_shld_1
13006 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13007 ix86_expand_ashl_const (low[0], count, mode);
13008 }
13009 return;
13010 }
13011
13012 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13013
13014 if (operands[1] == const1_rtx)
13015 {
13016 /* Assuming we've chosen a QImode capable registers, then 1 << N
13017 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13018 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13019 {
13020 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13021
13022 ix86_expand_clear (low[0]);
13023 ix86_expand_clear (high[0]);
13024 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13025
13026 d = gen_lowpart (QImode, low[0]);
13027 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13028 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13029 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13030
13031 d = gen_lowpart (QImode, high[0]);
13032 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13033 s = gen_rtx_NE (QImode, flags, const0_rtx);
13034 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13035 }
13036
13037 /* Otherwise, we can get the same results by manually performing
13038 a bit extract operation on bit 5/6, and then performing the two
13039 shifts. The two methods of getting 0/1 into low/high are exactly
13040 the same size. Avoiding the shift in the bit extract case helps
13041 pentium4 a bit; no one else seems to care much either way. */
13042 else
13043 {
13044 rtx x;
13045
13046 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13047 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13048 else
13049 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13050 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13051
13052 emit_insn ((mode == DImode
13053 ? gen_lshrsi3
13054 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13055 emit_insn ((mode == DImode
13056 ? gen_andsi3
13057 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13058 emit_move_insn (low[0], high[0]);
13059 emit_insn ((mode == DImode
13060 ? gen_xorsi3
13061 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13062 }
13063
13064 emit_insn ((mode == DImode
13065 ? gen_ashlsi3
13066 : gen_ashldi3) (low[0], low[0], operands[2]));
13067 emit_insn ((mode == DImode
13068 ? gen_ashlsi3
13069 : gen_ashldi3) (high[0], high[0], operands[2]));
13070 return;
13071 }
13072
13073 if (operands[1] == constm1_rtx)
13074 {
13075 /* For -1 << N, we can avoid the shld instruction, because we
13076 know that we're shifting 0...31/63 ones into a -1. */
13077 emit_move_insn (low[0], constm1_rtx);
13078 if (optimize_size)
13079 emit_move_insn (high[0], low[0]);
13080 else
13081 emit_move_insn (high[0], constm1_rtx);
13082 }
13083 else
13084 {
13085 if (!rtx_equal_p (operands[0], operands[1]))
13086 emit_move_insn (operands[0], operands[1]);
13087
13088 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13089 emit_insn ((mode == DImode
13090 ? gen_x86_shld_1
13091 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13092 }
13093
13094 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13095
13096 if (TARGET_CMOVE && scratch)
13097 {
13098 ix86_expand_clear (scratch);
13099 emit_insn ((mode == DImode
13100 ? gen_x86_shift_adj_1
13101 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13102 }
13103 else
13104 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13105 }
13106
13107 void
13108 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13109 {
13110 rtx low[2], high[2];
13111 int count;
13112 const int single_width = mode == DImode ? 32 : 64;
13113
13114 if (CONST_INT_P (operands[2]))
13115 {
13116 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13117 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13118
13119 if (count == single_width * 2 - 1)
13120 {
13121 emit_move_insn (high[0], high[1]);
13122 emit_insn ((mode == DImode
13123 ? gen_ashrsi3
13124 : gen_ashrdi3) (high[0], high[0],
13125 GEN_INT (single_width - 1)));
13126 emit_move_insn (low[0], high[0]);
13127
13128 }
13129 else if (count >= single_width)
13130 {
13131 emit_move_insn (low[0], high[1]);
13132 emit_move_insn (high[0], low[0]);
13133 emit_insn ((mode == DImode
13134 ? gen_ashrsi3
13135 : gen_ashrdi3) (high[0], high[0],
13136 GEN_INT (single_width - 1)));
13137 if (count > single_width)
13138 emit_insn ((mode == DImode
13139 ? gen_ashrsi3
13140 : gen_ashrdi3) (low[0], low[0],
13141 GEN_INT (count - single_width)));
13142 }
13143 else
13144 {
13145 if (!rtx_equal_p (operands[0], operands[1]))
13146 emit_move_insn (operands[0], operands[1]);
13147 emit_insn ((mode == DImode
13148 ? gen_x86_shrd_1
13149 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13150 emit_insn ((mode == DImode
13151 ? gen_ashrsi3
13152 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13153 }
13154 }
13155 else
13156 {
13157 if (!rtx_equal_p (operands[0], operands[1]))
13158 emit_move_insn (operands[0], operands[1]);
13159
13160 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13161
13162 emit_insn ((mode == DImode
13163 ? gen_x86_shrd_1
13164 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13165 emit_insn ((mode == DImode
13166 ? gen_ashrsi3
13167 : gen_ashrdi3) (high[0], high[0], operands[2]));
13168
13169 if (TARGET_CMOVE && scratch)
13170 {
13171 emit_move_insn (scratch, high[0]);
13172 emit_insn ((mode == DImode
13173 ? gen_ashrsi3
13174 : gen_ashrdi3) (scratch, scratch,
13175 GEN_INT (single_width - 1)));
13176 emit_insn ((mode == DImode
13177 ? gen_x86_shift_adj_1
13178 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13179 scratch));
13180 }
13181 else
13182 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13183 }
13184 }
13185
13186 void
13187 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13188 {
13189 rtx low[2], high[2];
13190 int count;
13191 const int single_width = mode == DImode ? 32 : 64;
13192
13193 if (CONST_INT_P (operands[2]))
13194 {
13195 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13196 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13197
13198 if (count >= single_width)
13199 {
13200 emit_move_insn (low[0], high[1]);
13201 ix86_expand_clear (high[0]);
13202
13203 if (count > single_width)
13204 emit_insn ((mode == DImode
13205 ? gen_lshrsi3
13206 : gen_lshrdi3) (low[0], low[0],
13207 GEN_INT (count - single_width)));
13208 }
13209 else
13210 {
13211 if (!rtx_equal_p (operands[0], operands[1]))
13212 emit_move_insn (operands[0], operands[1]);
13213 emit_insn ((mode == DImode
13214 ? gen_x86_shrd_1
13215 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13216 emit_insn ((mode == DImode
13217 ? gen_lshrsi3
13218 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13219 }
13220 }
13221 else
13222 {
13223 if (!rtx_equal_p (operands[0], operands[1]))
13224 emit_move_insn (operands[0], operands[1]);
13225
13226 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13227
13228 emit_insn ((mode == DImode
13229 ? gen_x86_shrd_1
13230 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13231 emit_insn ((mode == DImode
13232 ? gen_lshrsi3
13233 : gen_lshrdi3) (high[0], high[0], operands[2]));
13234
13235 /* Heh. By reversing the arguments, we can reuse this pattern. */
13236 if (TARGET_CMOVE && scratch)
13237 {
13238 ix86_expand_clear (scratch);
13239 emit_insn ((mode == DImode
13240 ? gen_x86_shift_adj_1
13241 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13242 scratch));
13243 }
13244 else
13245 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13246 }
13247 }
13248
13249 /* Predict just emitted jump instruction to be taken with probability PROB. */
13250 static void
13251 predict_jump (int prob)
13252 {
13253 rtx insn = get_last_insn ();
13254 gcc_assert (JUMP_P (insn));
13255 REG_NOTES (insn)
13256 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13257 GEN_INT (prob),
13258 REG_NOTES (insn));
13259 }
13260
13261 /* Helper function for the string operations below. Dest VARIABLE whether
13262 it is aligned to VALUE bytes. If true, jump to the label. */
13263 static rtx
13264 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13265 {
13266 rtx label = gen_label_rtx ();
13267 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13268 if (GET_MODE (variable) == DImode)
13269 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13270 else
13271 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13272 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13273 1, label);
13274 if (epilogue)
13275 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13276 else
13277 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13278 return label;
13279 }
13280
13281 /* Adjust COUNTER by the VALUE. */
13282 static void
13283 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13284 {
13285 if (GET_MODE (countreg) == DImode)
13286 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13287 else
13288 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13289 }
13290
13291 /* Zero extend possibly SImode EXP to Pmode register. */
13292 rtx
13293 ix86_zero_extend_to_Pmode (rtx exp)
13294 {
13295 rtx r;
13296 if (GET_MODE (exp) == VOIDmode)
13297 return force_reg (Pmode, exp);
13298 if (GET_MODE (exp) == Pmode)
13299 return copy_to_mode_reg (Pmode, exp);
13300 r = gen_reg_rtx (Pmode);
13301 emit_insn (gen_zero_extendsidi2 (r, exp));
13302 return r;
13303 }
13304
13305 /* Divide COUNTREG by SCALE. */
13306 static rtx
13307 scale_counter (rtx countreg, int scale)
13308 {
13309 rtx sc;
13310 rtx piece_size_mask;
13311
13312 if (scale == 1)
13313 return countreg;
13314 if (CONST_INT_P (countreg))
13315 return GEN_INT (INTVAL (countreg) / scale);
13316 gcc_assert (REG_P (countreg));
13317
13318 piece_size_mask = GEN_INT (scale - 1);
13319 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13320 GEN_INT (exact_log2 (scale)),
13321 NULL, 1, OPTAB_DIRECT);
13322 return sc;
13323 }
13324
13325 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13326 for constant loop counts. */
13327
13328 static enum machine_mode
13329 counter_mode (rtx count_exp)
13330 {
13331 if (GET_MODE (count_exp) != VOIDmode)
13332 return GET_MODE (count_exp);
13333 if (GET_CODE (count_exp) != CONST_INT)
13334 return Pmode;
13335 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13336 return DImode;
13337 return SImode;
13338 }
13339
13340 /* When SRCPTR is non-NULL, output simple loop to move memory
13341 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13342 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13343 equivalent loop to set memory by VALUE (supposed to be in MODE).
13344
13345 The size is rounded down to whole number of chunk size moved at once.
13346 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13347
13348
13349 static void
13350 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13351 rtx destptr, rtx srcptr, rtx value,
13352 rtx count, enum machine_mode mode, int unroll,
13353 int expected_size)
13354 {
13355 rtx out_label, top_label, iter, tmp;
13356 enum machine_mode iter_mode = counter_mode (count);
13357 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13358 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13359 rtx size;
13360 rtx x_addr;
13361 rtx y_addr;
13362 int i;
13363
13364 top_label = gen_label_rtx ();
13365 out_label = gen_label_rtx ();
13366 iter = gen_reg_rtx (iter_mode);
13367
13368 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13369 NULL, 1, OPTAB_DIRECT);
13370 /* Those two should combine. */
13371 if (piece_size == const1_rtx)
13372 {
13373 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13374 true, out_label);
13375 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13376 }
13377 emit_move_insn (iter, const0_rtx);
13378
13379 emit_label (top_label);
13380
13381 tmp = convert_modes (Pmode, iter_mode, iter, true);
13382 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13383 destmem = change_address (destmem, mode, x_addr);
13384
13385 if (srcmem)
13386 {
13387 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13388 srcmem = change_address (srcmem, mode, y_addr);
13389
13390 /* When unrolling for chips that reorder memory reads and writes,
13391 we can save registers by using single temporary.
13392 Also using 4 temporaries is overkill in 32bit mode. */
13393 if (!TARGET_64BIT && 0)
13394 {
13395 for (i = 0; i < unroll; i++)
13396 {
13397 if (i)
13398 {
13399 destmem =
13400 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13401 srcmem =
13402 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13403 }
13404 emit_move_insn (destmem, srcmem);
13405 }
13406 }
13407 else
13408 {
13409 rtx tmpreg[4];
13410 gcc_assert (unroll <= 4);
13411 for (i = 0; i < unroll; i++)
13412 {
13413 tmpreg[i] = gen_reg_rtx (mode);
13414 if (i)
13415 {
13416 srcmem =
13417 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13418 }
13419 emit_move_insn (tmpreg[i], srcmem);
13420 }
13421 for (i = 0; i < unroll; i++)
13422 {
13423 if (i)
13424 {
13425 destmem =
13426 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13427 }
13428 emit_move_insn (destmem, tmpreg[i]);
13429 }
13430 }
13431 }
13432 else
13433 for (i = 0; i < unroll; i++)
13434 {
13435 if (i)
13436 destmem =
13437 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13438 emit_move_insn (destmem, value);
13439 }
13440
13441 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13442 true, OPTAB_LIB_WIDEN);
13443 if (tmp != iter)
13444 emit_move_insn (iter, tmp);
13445
13446 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13447 true, top_label);
13448 if (expected_size != -1)
13449 {
13450 expected_size /= GET_MODE_SIZE (mode) * unroll;
13451 if (expected_size == 0)
13452 predict_jump (0);
13453 else if (expected_size > REG_BR_PROB_BASE)
13454 predict_jump (REG_BR_PROB_BASE - 1);
13455 else
13456 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13457 }
13458 else
13459 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13460 iter = ix86_zero_extend_to_Pmode (iter);
13461 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13462 true, OPTAB_LIB_WIDEN);
13463 if (tmp != destptr)
13464 emit_move_insn (destptr, tmp);
13465 if (srcptr)
13466 {
13467 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13468 true, OPTAB_LIB_WIDEN);
13469 if (tmp != srcptr)
13470 emit_move_insn (srcptr, tmp);
13471 }
13472 emit_label (out_label);
13473 }
13474
13475 /* Output "rep; mov" instruction.
13476 Arguments have same meaning as for previous function */
13477 static void
13478 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13479 rtx destptr, rtx srcptr,
13480 rtx count,
13481 enum machine_mode mode)
13482 {
13483 rtx destexp;
13484 rtx srcexp;
13485 rtx countreg;
13486
13487 /* If the size is known, it is shorter to use rep movs. */
13488 if (mode == QImode && CONST_INT_P (count)
13489 && !(INTVAL (count) & 3))
13490 mode = SImode;
13491
13492 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13493 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13494 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13495 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13496 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13497 if (mode != QImode)
13498 {
13499 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13500 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13501 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13502 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13503 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13504 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13505 }
13506 else
13507 {
13508 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13509 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13510 }
13511 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13512 destexp, srcexp));
13513 }
13514
13515 /* Output "rep; stos" instruction.
13516 Arguments have same meaning as for previous function */
13517 static void
13518 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13519 rtx count,
13520 enum machine_mode mode)
13521 {
13522 rtx destexp;
13523 rtx countreg;
13524
13525 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13526 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13527 value = force_reg (mode, gen_lowpart (mode, value));
13528 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13529 if (mode != QImode)
13530 {
13531 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13532 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13533 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13534 }
13535 else
13536 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13537 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13538 }
13539
13540 static void
13541 emit_strmov (rtx destmem, rtx srcmem,
13542 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13543 {
13544 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13545 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13546 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13547 }
13548
13549 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13550 static void
13551 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13552 rtx destptr, rtx srcptr, rtx count, int max_size)
13553 {
13554 rtx src, dest;
13555 if (CONST_INT_P (count))
13556 {
13557 HOST_WIDE_INT countval = INTVAL (count);
13558 int offset = 0;
13559
13560 if ((countval & 0x10) && max_size > 16)
13561 {
13562 if (TARGET_64BIT)
13563 {
13564 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13565 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13566 }
13567 else
13568 gcc_unreachable ();
13569 offset += 16;
13570 }
13571 if ((countval & 0x08) && max_size > 8)
13572 {
13573 if (TARGET_64BIT)
13574 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13575 else
13576 {
13577 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13578 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13579 }
13580 offset += 8;
13581 }
13582 if ((countval & 0x04) && max_size > 4)
13583 {
13584 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13585 offset += 4;
13586 }
13587 if ((countval & 0x02) && max_size > 2)
13588 {
13589 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13590 offset += 2;
13591 }
13592 if ((countval & 0x01) && max_size > 1)
13593 {
13594 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13595 offset += 1;
13596 }
13597 return;
13598 }
13599 if (max_size > 8)
13600 {
13601 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13602 count, 1, OPTAB_DIRECT);
13603 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13604 count, QImode, 1, 4);
13605 return;
13606 }
13607
13608 /* When there are stringops, we can cheaply increase dest and src pointers.
13609 Otherwise we save code size by maintaining offset (zero is readily
13610 available from preceding rep operation) and using x86 addressing modes.
13611 */
13612 if (TARGET_SINGLE_STRINGOP)
13613 {
13614 if (max_size > 4)
13615 {
13616 rtx label = ix86_expand_aligntest (count, 4, true);
13617 src = change_address (srcmem, SImode, srcptr);
13618 dest = change_address (destmem, SImode, destptr);
13619 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13620 emit_label (label);
13621 LABEL_NUSES (label) = 1;
13622 }
13623 if (max_size > 2)
13624 {
13625 rtx label = ix86_expand_aligntest (count, 2, true);
13626 src = change_address (srcmem, HImode, srcptr);
13627 dest = change_address (destmem, HImode, destptr);
13628 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13629 emit_label (label);
13630 LABEL_NUSES (label) = 1;
13631 }
13632 if (max_size > 1)
13633 {
13634 rtx label = ix86_expand_aligntest (count, 1, true);
13635 src = change_address (srcmem, QImode, srcptr);
13636 dest = change_address (destmem, QImode, destptr);
13637 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13638 emit_label (label);
13639 LABEL_NUSES (label) = 1;
13640 }
13641 }
13642 else
13643 {
13644 rtx offset = force_reg (Pmode, const0_rtx);
13645 rtx tmp;
13646
13647 if (max_size > 4)
13648 {
13649 rtx label = ix86_expand_aligntest (count, 4, true);
13650 src = change_address (srcmem, SImode, srcptr);
13651 dest = change_address (destmem, SImode, destptr);
13652 emit_move_insn (dest, src);
13653 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13654 true, OPTAB_LIB_WIDEN);
13655 if (tmp != offset)
13656 emit_move_insn (offset, tmp);
13657 emit_label (label);
13658 LABEL_NUSES (label) = 1;
13659 }
13660 if (max_size > 2)
13661 {
13662 rtx label = ix86_expand_aligntest (count, 2, true);
13663 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13664 src = change_address (srcmem, HImode, tmp);
13665 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13666 dest = change_address (destmem, HImode, tmp);
13667 emit_move_insn (dest, src);
13668 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13669 true, OPTAB_LIB_WIDEN);
13670 if (tmp != offset)
13671 emit_move_insn (offset, tmp);
13672 emit_label (label);
13673 LABEL_NUSES (label) = 1;
13674 }
13675 if (max_size > 1)
13676 {
13677 rtx label = ix86_expand_aligntest (count, 1, true);
13678 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13679 src = change_address (srcmem, QImode, tmp);
13680 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13681 dest = change_address (destmem, QImode, tmp);
13682 emit_move_insn (dest, src);
13683 emit_label (label);
13684 LABEL_NUSES (label) = 1;
13685 }
13686 }
13687 }
13688
13689 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13690 static void
13691 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13692 rtx count, int max_size)
13693 {
13694 count =
13695 expand_simple_binop (counter_mode (count), AND, count,
13696 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13697 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13698 gen_lowpart (QImode, value), count, QImode,
13699 1, max_size / 2);
13700 }
13701
13702 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13703 static void
13704 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13705 {
13706 rtx dest;
13707
13708 if (CONST_INT_P (count))
13709 {
13710 HOST_WIDE_INT countval = INTVAL (count);
13711 int offset = 0;
13712
13713 if ((countval & 0x10) && max_size > 16)
13714 {
13715 if (TARGET_64BIT)
13716 {
13717 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13718 emit_insn (gen_strset (destptr, dest, value));
13719 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13720 emit_insn (gen_strset (destptr, dest, value));
13721 }
13722 else
13723 gcc_unreachable ();
13724 offset += 16;
13725 }
13726 if ((countval & 0x08) && max_size > 8)
13727 {
13728 if (TARGET_64BIT)
13729 {
13730 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13731 emit_insn (gen_strset (destptr, dest, value));
13732 }
13733 else
13734 {
13735 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13736 emit_insn (gen_strset (destptr, dest, value));
13737 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13738 emit_insn (gen_strset (destptr, dest, value));
13739 }
13740 offset += 8;
13741 }
13742 if ((countval & 0x04) && max_size > 4)
13743 {
13744 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13745 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13746 offset += 4;
13747 }
13748 if ((countval & 0x02) && max_size > 2)
13749 {
13750 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13751 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13752 offset += 2;
13753 }
13754 if ((countval & 0x01) && max_size > 1)
13755 {
13756 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13757 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13758 offset += 1;
13759 }
13760 return;
13761 }
13762 if (max_size > 32)
13763 {
13764 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13765 return;
13766 }
13767 if (max_size > 16)
13768 {
13769 rtx label = ix86_expand_aligntest (count, 16, true);
13770 if (TARGET_64BIT)
13771 {
13772 dest = change_address (destmem, DImode, destptr);
13773 emit_insn (gen_strset (destptr, dest, value));
13774 emit_insn (gen_strset (destptr, dest, value));
13775 }
13776 else
13777 {
13778 dest = change_address (destmem, SImode, destptr);
13779 emit_insn (gen_strset (destptr, dest, value));
13780 emit_insn (gen_strset (destptr, dest, value));
13781 emit_insn (gen_strset (destptr, dest, value));
13782 emit_insn (gen_strset (destptr, dest, value));
13783 }
13784 emit_label (label);
13785 LABEL_NUSES (label) = 1;
13786 }
13787 if (max_size > 8)
13788 {
13789 rtx label = ix86_expand_aligntest (count, 8, true);
13790 if (TARGET_64BIT)
13791 {
13792 dest = change_address (destmem, DImode, destptr);
13793 emit_insn (gen_strset (destptr, dest, value));
13794 }
13795 else
13796 {
13797 dest = change_address (destmem, SImode, destptr);
13798 emit_insn (gen_strset (destptr, dest, value));
13799 emit_insn (gen_strset (destptr, dest, value));
13800 }
13801 emit_label (label);
13802 LABEL_NUSES (label) = 1;
13803 }
13804 if (max_size > 4)
13805 {
13806 rtx label = ix86_expand_aligntest (count, 4, true);
13807 dest = change_address (destmem, SImode, destptr);
13808 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13809 emit_label (label);
13810 LABEL_NUSES (label) = 1;
13811 }
13812 if (max_size > 2)
13813 {
13814 rtx label = ix86_expand_aligntest (count, 2, true);
13815 dest = change_address (destmem, HImode, destptr);
13816 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13817 emit_label (label);
13818 LABEL_NUSES (label) = 1;
13819 }
13820 if (max_size > 1)
13821 {
13822 rtx label = ix86_expand_aligntest (count, 1, true);
13823 dest = change_address (destmem, QImode, destptr);
13824 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13825 emit_label (label);
13826 LABEL_NUSES (label) = 1;
13827 }
13828 }
13829
13830 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13831 DESIRED_ALIGNMENT. */
13832 static void
13833 expand_movmem_prologue (rtx destmem, rtx srcmem,
13834 rtx destptr, rtx srcptr, rtx count,
13835 int align, int desired_alignment)
13836 {
13837 if (align <= 1 && desired_alignment > 1)
13838 {
13839 rtx label = ix86_expand_aligntest (destptr, 1, false);
13840 srcmem = change_address (srcmem, QImode, srcptr);
13841 destmem = change_address (destmem, QImode, destptr);
13842 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13843 ix86_adjust_counter (count, 1);
13844 emit_label (label);
13845 LABEL_NUSES (label) = 1;
13846 }
13847 if (align <= 2 && desired_alignment > 2)
13848 {
13849 rtx label = ix86_expand_aligntest (destptr, 2, false);
13850 srcmem = change_address (srcmem, HImode, srcptr);
13851 destmem = change_address (destmem, HImode, destptr);
13852 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13853 ix86_adjust_counter (count, 2);
13854 emit_label (label);
13855 LABEL_NUSES (label) = 1;
13856 }
13857 if (align <= 4 && desired_alignment > 4)
13858 {
13859 rtx label = ix86_expand_aligntest (destptr, 4, false);
13860 srcmem = change_address (srcmem, SImode, srcptr);
13861 destmem = change_address (destmem, SImode, destptr);
13862 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13863 ix86_adjust_counter (count, 4);
13864 emit_label (label);
13865 LABEL_NUSES (label) = 1;
13866 }
13867 gcc_assert (desired_alignment <= 8);
13868 }
13869
13870 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13871 DESIRED_ALIGNMENT. */
13872 static void
13873 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13874 int align, int desired_alignment)
13875 {
13876 if (align <= 1 && desired_alignment > 1)
13877 {
13878 rtx label = ix86_expand_aligntest (destptr, 1, false);
13879 destmem = change_address (destmem, QImode, destptr);
13880 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13881 ix86_adjust_counter (count, 1);
13882 emit_label (label);
13883 LABEL_NUSES (label) = 1;
13884 }
13885 if (align <= 2 && desired_alignment > 2)
13886 {
13887 rtx label = ix86_expand_aligntest (destptr, 2, false);
13888 destmem = change_address (destmem, HImode, destptr);
13889 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13890 ix86_adjust_counter (count, 2);
13891 emit_label (label);
13892 LABEL_NUSES (label) = 1;
13893 }
13894 if (align <= 4 && desired_alignment > 4)
13895 {
13896 rtx label = ix86_expand_aligntest (destptr, 4, false);
13897 destmem = change_address (destmem, SImode, destptr);
13898 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13899 ix86_adjust_counter (count, 4);
13900 emit_label (label);
13901 LABEL_NUSES (label) = 1;
13902 }
13903 gcc_assert (desired_alignment <= 8);
13904 }
13905
13906 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13907 static enum stringop_alg
13908 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13909 int *dynamic_check)
13910 {
13911 const struct stringop_algs * algs;
13912
13913 *dynamic_check = -1;
13914 if (memset)
13915 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13916 else
13917 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13918 if (stringop_alg != no_stringop)
13919 return stringop_alg;
13920 /* rep; movq or rep; movl is the smallest variant. */
13921 else if (optimize_size)
13922 {
13923 if (!count || (count & 3))
13924 return rep_prefix_1_byte;
13925 else
13926 return rep_prefix_4_byte;
13927 }
13928 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13929 */
13930 else if (expected_size != -1 && expected_size < 4)
13931 return loop_1_byte;
13932 else if (expected_size != -1)
13933 {
13934 unsigned int i;
13935 enum stringop_alg alg = libcall;
13936 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13937 {
13938 gcc_assert (algs->size[i].max);
13939 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13940 {
13941 if (algs->size[i].alg != libcall)
13942 alg = algs->size[i].alg;
13943 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13944 last non-libcall inline algorithm. */
13945 if (TARGET_INLINE_ALL_STRINGOPS)
13946 {
13947 /* When the current size is best to be copied by a libcall,
13948 but we are still forced to inline, run the heuristic bellow
13949 that will pick code for medium sized blocks. */
13950 if (alg != libcall)
13951 return alg;
13952 break;
13953 }
13954 else
13955 return algs->size[i].alg;
13956 }
13957 }
13958 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13959 }
13960 /* When asked to inline the call anyway, try to pick meaningful choice.
13961 We look for maximal size of block that is faster to copy by hand and
13962 take blocks of at most of that size guessing that average size will
13963 be roughly half of the block.
13964
13965 If this turns out to be bad, we might simply specify the preferred
13966 choice in ix86_costs. */
13967 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13968 && algs->unknown_size == libcall)
13969 {
13970 int max = -1;
13971 enum stringop_alg alg;
13972 int i;
13973
13974 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13975 if (algs->size[i].alg != libcall && algs->size[i].alg)
13976 max = algs->size[i].max;
13977 if (max == -1)
13978 max = 4096;
13979 alg = decide_alg (count, max / 2, memset, dynamic_check);
13980 gcc_assert (*dynamic_check == -1);
13981 gcc_assert (alg != libcall);
13982 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13983 *dynamic_check = max;
13984 return alg;
13985 }
13986 return algs->unknown_size;
13987 }
13988
13989 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13990 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13991 static int
13992 decide_alignment (int align,
13993 enum stringop_alg alg,
13994 int expected_size)
13995 {
13996 int desired_align = 0;
13997 switch (alg)
13998 {
13999 case no_stringop:
14000 gcc_unreachable ();
14001 case loop:
14002 case unrolled_loop:
14003 desired_align = GET_MODE_SIZE (Pmode);
14004 break;
14005 case rep_prefix_8_byte:
14006 desired_align = 8;
14007 break;
14008 case rep_prefix_4_byte:
14009 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14010 copying whole cacheline at once. */
14011 if (TARGET_PENTIUMPRO)
14012 desired_align = 8;
14013 else
14014 desired_align = 4;
14015 break;
14016 case rep_prefix_1_byte:
14017 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14018 copying whole cacheline at once. */
14019 if (TARGET_PENTIUMPRO)
14020 desired_align = 8;
14021 else
14022 desired_align = 1;
14023 break;
14024 case loop_1_byte:
14025 desired_align = 1;
14026 break;
14027 case libcall:
14028 return 0;
14029 }
14030
14031 if (optimize_size)
14032 desired_align = 1;
14033 if (desired_align < align)
14034 desired_align = align;
14035 if (expected_size != -1 && expected_size < 4)
14036 desired_align = align;
14037 return desired_align;
14038 }
14039
14040 /* Return the smallest power of 2 greater than VAL. */
14041 static int
14042 smallest_pow2_greater_than (int val)
14043 {
14044 int ret = 1;
14045 while (ret <= val)
14046 ret <<= 1;
14047 return ret;
14048 }
14049
14050 /* Expand string move (memcpy) operation. Use i386 string operations when
14051 profitable. expand_clrmem contains similar code. The code depends upon
14052 architecture, block size and alignment, but always has the same
14053 overall structure:
14054
14055 1) Prologue guard: Conditional that jumps up to epilogues for small
14056 blocks that can be handled by epilogue alone. This is faster but
14057 also needed for correctness, since prologue assume the block is larger
14058 than the desired alignment.
14059
14060 Optional dynamic check for size and libcall for large
14061 blocks is emitted here too, with -minline-stringops-dynamically.
14062
14063 2) Prologue: copy first few bytes in order to get destination aligned
14064 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14065 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14066 We emit either a jump tree on power of two sized blocks, or a byte loop.
14067
14068 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14069 with specified algorithm.
14070
14071 4) Epilogue: code copying tail of the block that is too small to be
14072 handled by main body (or up to size guarded by prologue guard). */
14073
14074 int
14075 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14076 rtx expected_align_exp, rtx expected_size_exp)
14077 {
14078 rtx destreg;
14079 rtx srcreg;
14080 rtx label = NULL;
14081 rtx tmp;
14082 rtx jump_around_label = NULL;
14083 HOST_WIDE_INT align = 1;
14084 unsigned HOST_WIDE_INT count = 0;
14085 HOST_WIDE_INT expected_size = -1;
14086 int size_needed = 0, epilogue_size_needed;
14087 int desired_align = 0;
14088 enum stringop_alg alg;
14089 int dynamic_check;
14090
14091 if (CONST_INT_P (align_exp))
14092 align = INTVAL (align_exp);
14093 /* i386 can do misaligned access on reasonably increased cost. */
14094 if (CONST_INT_P (expected_align_exp)
14095 && INTVAL (expected_align_exp) > align)
14096 align = INTVAL (expected_align_exp);
14097 if (CONST_INT_P (count_exp))
14098 count = expected_size = INTVAL (count_exp);
14099 if (CONST_INT_P (expected_size_exp) && count == 0)
14100 expected_size = INTVAL (expected_size_exp);
14101
14102 /* Step 0: Decide on preferred algorithm, desired alignment and
14103 size of chunks to be copied by main loop. */
14104
14105 alg = decide_alg (count, expected_size, false, &dynamic_check);
14106 desired_align = decide_alignment (align, alg, expected_size);
14107
14108 if (!TARGET_ALIGN_STRINGOPS)
14109 align = desired_align;
14110
14111 if (alg == libcall)
14112 return 0;
14113 gcc_assert (alg != no_stringop);
14114 if (!count)
14115 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14116 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14117 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14118 switch (alg)
14119 {
14120 case libcall:
14121 case no_stringop:
14122 gcc_unreachable ();
14123 case loop:
14124 size_needed = GET_MODE_SIZE (Pmode);
14125 break;
14126 case unrolled_loop:
14127 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14128 break;
14129 case rep_prefix_8_byte:
14130 size_needed = 8;
14131 break;
14132 case rep_prefix_4_byte:
14133 size_needed = 4;
14134 break;
14135 case rep_prefix_1_byte:
14136 case loop_1_byte:
14137 size_needed = 1;
14138 break;
14139 }
14140
14141 epilogue_size_needed = size_needed;
14142
14143 /* Step 1: Prologue guard. */
14144
14145 /* Alignment code needs count to be in register. */
14146 if (CONST_INT_P (count_exp) && desired_align > align)
14147 {
14148 enum machine_mode mode = SImode;
14149 if (TARGET_64BIT && (count & ~0xffffffff))
14150 mode = DImode;
14151 count_exp = force_reg (mode, count_exp);
14152 }
14153 gcc_assert (desired_align >= 1 && align >= 1);
14154
14155 /* Ensure that alignment prologue won't copy past end of block. */
14156 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14157 {
14158 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14159 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14160 Make sure it is power of 2. */
14161 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14162
14163 label = gen_label_rtx ();
14164 emit_cmp_and_jump_insns (count_exp,
14165 GEN_INT (epilogue_size_needed),
14166 LTU, 0, counter_mode (count_exp), 1, label);
14167 if (GET_CODE (count_exp) == CONST_INT)
14168 ;
14169 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14170 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14171 else
14172 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14173 }
14174 /* Emit code to decide on runtime whether library call or inline should be
14175 used. */
14176 if (dynamic_check != -1)
14177 {
14178 rtx hot_label = gen_label_rtx ();
14179 jump_around_label = gen_label_rtx ();
14180 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14181 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14182 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14183 emit_block_move_via_libcall (dst, src, count_exp, false);
14184 emit_jump (jump_around_label);
14185 emit_label (hot_label);
14186 }
14187
14188 /* Step 2: Alignment prologue. */
14189
14190 if (desired_align > align)
14191 {
14192 /* Except for the first move in epilogue, we no longer know
14193 constant offset in aliasing info. It don't seems to worth
14194 the pain to maintain it for the first move, so throw away
14195 the info early. */
14196 src = change_address (src, BLKmode, srcreg);
14197 dst = change_address (dst, BLKmode, destreg);
14198 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14199 desired_align);
14200 }
14201 if (label && size_needed == 1)
14202 {
14203 emit_label (label);
14204 LABEL_NUSES (label) = 1;
14205 label = NULL;
14206 }
14207
14208 /* Step 3: Main loop. */
14209
14210 switch (alg)
14211 {
14212 case libcall:
14213 case no_stringop:
14214 gcc_unreachable ();
14215 case loop_1_byte:
14216 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14217 count_exp, QImode, 1, expected_size);
14218 break;
14219 case loop:
14220 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14221 count_exp, Pmode, 1, expected_size);
14222 break;
14223 case unrolled_loop:
14224 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14225 registers for 4 temporaries anyway. */
14226 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14227 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14228 expected_size);
14229 break;
14230 case rep_prefix_8_byte:
14231 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14232 DImode);
14233 break;
14234 case rep_prefix_4_byte:
14235 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14236 SImode);
14237 break;
14238 case rep_prefix_1_byte:
14239 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14240 QImode);
14241 break;
14242 }
14243 /* Adjust properly the offset of src and dest memory for aliasing. */
14244 if (CONST_INT_P (count_exp))
14245 {
14246 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14247 (count / size_needed) * size_needed);
14248 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14249 (count / size_needed) * size_needed);
14250 }
14251 else
14252 {
14253 src = change_address (src, BLKmode, srcreg);
14254 dst = change_address (dst, BLKmode, destreg);
14255 }
14256
14257 /* Step 4: Epilogue to copy the remaining bytes. */
14258
14259 if (label)
14260 {
14261 /* When the main loop is done, COUNT_EXP might hold original count,
14262 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14263 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14264 bytes. Compensate if needed. */
14265
14266 if (size_needed < epilogue_size_needed)
14267 {
14268 tmp =
14269 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14270 GEN_INT (size_needed - 1), count_exp, 1,
14271 OPTAB_DIRECT);
14272 if (tmp != count_exp)
14273 emit_move_insn (count_exp, tmp);
14274 }
14275 emit_label (label);
14276 LABEL_NUSES (label) = 1;
14277 }
14278
14279 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14280 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14281 epilogue_size_needed);
14282 if (jump_around_label)
14283 emit_label (jump_around_label);
14284 return 1;
14285 }
14286
14287 /* Helper function for memcpy. For QImode value 0xXY produce
14288 0xXYXYXYXY of wide specified by MODE. This is essentially
14289 a * 0x10101010, but we can do slightly better than
14290 synth_mult by unwinding the sequence by hand on CPUs with
14291 slow multiply. */
14292 static rtx
14293 promote_duplicated_reg (enum machine_mode mode, rtx val)
14294 {
14295 enum machine_mode valmode = GET_MODE (val);
14296 rtx tmp;
14297 int nops = mode == DImode ? 3 : 2;
14298
14299 gcc_assert (mode == SImode || mode == DImode);
14300 if (val == const0_rtx)
14301 return copy_to_mode_reg (mode, const0_rtx);
14302 if (CONST_INT_P (val))
14303 {
14304 HOST_WIDE_INT v = INTVAL (val) & 255;
14305
14306 v |= v << 8;
14307 v |= v << 16;
14308 if (mode == DImode)
14309 v |= (v << 16) << 16;
14310 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14311 }
14312
14313 if (valmode == VOIDmode)
14314 valmode = QImode;
14315 if (valmode != QImode)
14316 val = gen_lowpart (QImode, val);
14317 if (mode == QImode)
14318 return val;
14319 if (!TARGET_PARTIAL_REG_STALL)
14320 nops--;
14321 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14322 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14323 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14324 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14325 {
14326 rtx reg = convert_modes (mode, QImode, val, true);
14327 tmp = promote_duplicated_reg (mode, const1_rtx);
14328 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14329 OPTAB_DIRECT);
14330 }
14331 else
14332 {
14333 rtx reg = convert_modes (mode, QImode, val, true);
14334
14335 if (!TARGET_PARTIAL_REG_STALL)
14336 if (mode == SImode)
14337 emit_insn (gen_movsi_insv_1 (reg, reg));
14338 else
14339 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14340 else
14341 {
14342 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14343 NULL, 1, OPTAB_DIRECT);
14344 reg =
14345 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14346 }
14347 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14348 NULL, 1, OPTAB_DIRECT);
14349 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14350 if (mode == SImode)
14351 return reg;
14352 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14353 NULL, 1, OPTAB_DIRECT);
14354 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14355 return reg;
14356 }
14357 }
14358
14359 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14360 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14361 alignment from ALIGN to DESIRED_ALIGN. */
14362 static rtx
14363 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14364 {
14365 rtx promoted_val;
14366
14367 if (TARGET_64BIT
14368 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14369 promoted_val = promote_duplicated_reg (DImode, val);
14370 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14371 promoted_val = promote_duplicated_reg (SImode, val);
14372 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14373 promoted_val = promote_duplicated_reg (HImode, val);
14374 else
14375 promoted_val = val;
14376
14377 return promoted_val;
14378 }
14379
14380 /* Expand string clear operation (bzero). Use i386 string operations when
14381 profitable. See expand_movmem comment for explanation of individual
14382 steps performed. */
14383 int
14384 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14385 rtx expected_align_exp, rtx expected_size_exp)
14386 {
14387 rtx destreg;
14388 rtx label = NULL;
14389 rtx tmp;
14390 rtx jump_around_label = NULL;
14391 HOST_WIDE_INT align = 1;
14392 unsigned HOST_WIDE_INT count = 0;
14393 HOST_WIDE_INT expected_size = -1;
14394 int size_needed = 0, epilogue_size_needed;
14395 int desired_align = 0;
14396 enum stringop_alg alg;
14397 rtx promoted_val = NULL;
14398 bool force_loopy_epilogue = false;
14399 int dynamic_check;
14400
14401 if (CONST_INT_P (align_exp))
14402 align = INTVAL (align_exp);
14403 /* i386 can do misaligned access on reasonably increased cost. */
14404 if (CONST_INT_P (expected_align_exp)
14405 && INTVAL (expected_align_exp) > align)
14406 align = INTVAL (expected_align_exp);
14407 if (CONST_INT_P (count_exp))
14408 count = expected_size = INTVAL (count_exp);
14409 if (CONST_INT_P (expected_size_exp) && count == 0)
14410 expected_size = INTVAL (expected_size_exp);
14411
14412 /* Step 0: Decide on preferred algorithm, desired alignment and
14413 size of chunks to be copied by main loop. */
14414
14415 alg = decide_alg (count, expected_size, true, &dynamic_check);
14416 desired_align = decide_alignment (align, alg, expected_size);
14417
14418 if (!TARGET_ALIGN_STRINGOPS)
14419 align = desired_align;
14420
14421 if (alg == libcall)
14422 return 0;
14423 gcc_assert (alg != no_stringop);
14424 if (!count)
14425 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14426 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14427 switch (alg)
14428 {
14429 case libcall:
14430 case no_stringop:
14431 gcc_unreachable ();
14432 case loop:
14433 size_needed = GET_MODE_SIZE (Pmode);
14434 break;
14435 case unrolled_loop:
14436 size_needed = GET_MODE_SIZE (Pmode) * 4;
14437 break;
14438 case rep_prefix_8_byte:
14439 size_needed = 8;
14440 break;
14441 case rep_prefix_4_byte:
14442 size_needed = 4;
14443 break;
14444 case rep_prefix_1_byte:
14445 case loop_1_byte:
14446 size_needed = 1;
14447 break;
14448 }
14449 epilogue_size_needed = size_needed;
14450
14451 /* Step 1: Prologue guard. */
14452
14453 /* Alignment code needs count to be in register. */
14454 if (CONST_INT_P (count_exp) && desired_align > align)
14455 {
14456 enum machine_mode mode = SImode;
14457 if (TARGET_64BIT && (count & ~0xffffffff))
14458 mode = DImode;
14459 count_exp = force_reg (mode, count_exp);
14460 }
14461 /* Do the cheap promotion to allow better CSE across the
14462 main loop and epilogue (ie one load of the big constant in the
14463 front of all code. */
14464 if (CONST_INT_P (val_exp))
14465 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14466 desired_align, align);
14467 /* Ensure that alignment prologue won't copy past end of block. */
14468 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14469 {
14470 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14471 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14472 Make sure it is power of 2. */
14473 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14474
14475 /* To improve performance of small blocks, we jump around the VAL
14476 promoting mode. This mean that if the promoted VAL is not constant,
14477 we might not use it in the epilogue and have to use byte
14478 loop variant. */
14479 if (epilogue_size_needed > 2 && !promoted_val)
14480 force_loopy_epilogue = true;
14481 label = gen_label_rtx ();
14482 emit_cmp_and_jump_insns (count_exp,
14483 GEN_INT (epilogue_size_needed),
14484 LTU, 0, counter_mode (count_exp), 1, label);
14485 if (GET_CODE (count_exp) == CONST_INT)
14486 ;
14487 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14488 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14489 else
14490 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14491 }
14492 if (dynamic_check != -1)
14493 {
14494 rtx hot_label = gen_label_rtx ();
14495 jump_around_label = gen_label_rtx ();
14496 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14497 LEU, 0, counter_mode (count_exp), 1, hot_label);
14498 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14499 set_storage_via_libcall (dst, count_exp, val_exp, false);
14500 emit_jump (jump_around_label);
14501 emit_label (hot_label);
14502 }
14503
14504 /* Step 2: Alignment prologue. */
14505
14506 /* Do the expensive promotion once we branched off the small blocks. */
14507 if (!promoted_val)
14508 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14509 desired_align, align);
14510 gcc_assert (desired_align >= 1 && align >= 1);
14511
14512 if (desired_align > align)
14513 {
14514 /* Except for the first move in epilogue, we no longer know
14515 constant offset in aliasing info. It don't seems to worth
14516 the pain to maintain it for the first move, so throw away
14517 the info early. */
14518 dst = change_address (dst, BLKmode, destreg);
14519 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14520 desired_align);
14521 }
14522 if (label && size_needed == 1)
14523 {
14524 emit_label (label);
14525 LABEL_NUSES (label) = 1;
14526 label = NULL;
14527 }
14528
14529 /* Step 3: Main loop. */
14530
14531 switch (alg)
14532 {
14533 case libcall:
14534 case no_stringop:
14535 gcc_unreachable ();
14536 case loop_1_byte:
14537 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14538 count_exp, QImode, 1, expected_size);
14539 break;
14540 case loop:
14541 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14542 count_exp, Pmode, 1, expected_size);
14543 break;
14544 case unrolled_loop:
14545 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14546 count_exp, Pmode, 4, expected_size);
14547 break;
14548 case rep_prefix_8_byte:
14549 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14550 DImode);
14551 break;
14552 case rep_prefix_4_byte:
14553 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14554 SImode);
14555 break;
14556 case rep_prefix_1_byte:
14557 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14558 QImode);
14559 break;
14560 }
14561 /* Adjust properly the offset of src and dest memory for aliasing. */
14562 if (CONST_INT_P (count_exp))
14563 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14564 (count / size_needed) * size_needed);
14565 else
14566 dst = change_address (dst, BLKmode, destreg);
14567
14568 /* Step 4: Epilogue to copy the remaining bytes. */
14569
14570 if (label)
14571 {
14572 /* When the main loop is done, COUNT_EXP might hold original count,
14573 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14574 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14575 bytes. Compensate if needed. */
14576
14577 if (size_needed < desired_align - align)
14578 {
14579 tmp =
14580 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14581 GEN_INT (size_needed - 1), count_exp, 1,
14582 OPTAB_DIRECT);
14583 size_needed = desired_align - align + 1;
14584 if (tmp != count_exp)
14585 emit_move_insn (count_exp, tmp);
14586 }
14587 emit_label (label);
14588 LABEL_NUSES (label) = 1;
14589 }
14590 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14591 {
14592 if (force_loopy_epilogue)
14593 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14594 size_needed);
14595 else
14596 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14597 size_needed);
14598 }
14599 if (jump_around_label)
14600 emit_label (jump_around_label);
14601 return 1;
14602 }
14603
14604 /* Expand strlen. */
14605 int
14606 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14607 {
14608 rtx addr, scratch1, scratch2, scratch3, scratch4;
14609
14610 /* The generic case of strlen expander is long. Avoid it's
14611 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14612
14613 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14614 && !TARGET_INLINE_ALL_STRINGOPS
14615 && !optimize_size
14616 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14617 return 0;
14618
14619 addr = force_reg (Pmode, XEXP (src, 0));
14620 scratch1 = gen_reg_rtx (Pmode);
14621
14622 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14623 && !optimize_size)
14624 {
14625 /* Well it seems that some optimizer does not combine a call like
14626 foo(strlen(bar), strlen(bar));
14627 when the move and the subtraction is done here. It does calculate
14628 the length just once when these instructions are done inside of
14629 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14630 often used and I use one fewer register for the lifetime of
14631 output_strlen_unroll() this is better. */
14632
14633 emit_move_insn (out, addr);
14634
14635 ix86_expand_strlensi_unroll_1 (out, src, align);
14636
14637 /* strlensi_unroll_1 returns the address of the zero at the end of
14638 the string, like memchr(), so compute the length by subtracting
14639 the start address. */
14640 if (TARGET_64BIT)
14641 emit_insn (gen_subdi3 (out, out, addr));
14642 else
14643 emit_insn (gen_subsi3 (out, out, addr));
14644 }
14645 else
14646 {
14647 rtx unspec;
14648 scratch2 = gen_reg_rtx (Pmode);
14649 scratch3 = gen_reg_rtx (Pmode);
14650 scratch4 = force_reg (Pmode, constm1_rtx);
14651
14652 emit_move_insn (scratch3, addr);
14653 eoschar = force_reg (QImode, eoschar);
14654
14655 src = replace_equiv_address_nv (src, scratch3);
14656
14657 /* If .md starts supporting :P, this can be done in .md. */
14658 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14659 scratch4), UNSPEC_SCAS);
14660 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14661 if (TARGET_64BIT)
14662 {
14663 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14664 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14665 }
14666 else
14667 {
14668 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14669 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14670 }
14671 }
14672 return 1;
14673 }
14674
14675 /* Expand the appropriate insns for doing strlen if not just doing
14676 repnz; scasb
14677
14678 out = result, initialized with the start address
14679 align_rtx = alignment of the address.
14680 scratch = scratch register, initialized with the startaddress when
14681 not aligned, otherwise undefined
14682
14683 This is just the body. It needs the initializations mentioned above and
14684 some address computing at the end. These things are done in i386.md. */
14685
14686 static void
14687 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14688 {
14689 int align;
14690 rtx tmp;
14691 rtx align_2_label = NULL_RTX;
14692 rtx align_3_label = NULL_RTX;
14693 rtx align_4_label = gen_label_rtx ();
14694 rtx end_0_label = gen_label_rtx ();
14695 rtx mem;
14696 rtx tmpreg = gen_reg_rtx (SImode);
14697 rtx scratch = gen_reg_rtx (SImode);
14698 rtx cmp;
14699
14700 align = 0;
14701 if (CONST_INT_P (align_rtx))
14702 align = INTVAL (align_rtx);
14703
14704 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14705
14706 /* Is there a known alignment and is it less than 4? */
14707 if (align < 4)
14708 {
14709 rtx scratch1 = gen_reg_rtx (Pmode);
14710 emit_move_insn (scratch1, out);
14711 /* Is there a known alignment and is it not 2? */
14712 if (align != 2)
14713 {
14714 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14715 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14716
14717 /* Leave just the 3 lower bits. */
14718 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14719 NULL_RTX, 0, OPTAB_WIDEN);
14720
14721 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14722 Pmode, 1, align_4_label);
14723 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14724 Pmode, 1, align_2_label);
14725 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14726 Pmode, 1, align_3_label);
14727 }
14728 else
14729 {
14730 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14731 check if is aligned to 4 - byte. */
14732
14733 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14734 NULL_RTX, 0, OPTAB_WIDEN);
14735
14736 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14737 Pmode, 1, align_4_label);
14738 }
14739
14740 mem = change_address (src, QImode, out);
14741
14742 /* Now compare the bytes. */
14743
14744 /* Compare the first n unaligned byte on a byte per byte basis. */
14745 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14746 QImode, 1, end_0_label);
14747
14748 /* Increment the address. */
14749 if (TARGET_64BIT)
14750 emit_insn (gen_adddi3 (out, out, const1_rtx));
14751 else
14752 emit_insn (gen_addsi3 (out, out, const1_rtx));
14753
14754 /* Not needed with an alignment of 2 */
14755 if (align != 2)
14756 {
14757 emit_label (align_2_label);
14758
14759 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14760 end_0_label);
14761
14762 if (TARGET_64BIT)
14763 emit_insn (gen_adddi3 (out, out, const1_rtx));
14764 else
14765 emit_insn (gen_addsi3 (out, out, const1_rtx));
14766
14767 emit_label (align_3_label);
14768 }
14769
14770 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14771 end_0_label);
14772
14773 if (TARGET_64BIT)
14774 emit_insn (gen_adddi3 (out, out, const1_rtx));
14775 else
14776 emit_insn (gen_addsi3 (out, out, const1_rtx));
14777 }
14778
14779 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14780 align this loop. It gives only huge programs, but does not help to
14781 speed up. */
14782 emit_label (align_4_label);
14783
14784 mem = change_address (src, SImode, out);
14785 emit_move_insn (scratch, mem);
14786 if (TARGET_64BIT)
14787 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14788 else
14789 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14790
14791 /* This formula yields a nonzero result iff one of the bytes is zero.
14792 This saves three branches inside loop and many cycles. */
14793
14794 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14795 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14796 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14797 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14798 gen_int_mode (0x80808080, SImode)));
14799 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14800 align_4_label);
14801
14802 if (TARGET_CMOVE)
14803 {
14804 rtx reg = gen_reg_rtx (SImode);
14805 rtx reg2 = gen_reg_rtx (Pmode);
14806 emit_move_insn (reg, tmpreg);
14807 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14808
14809 /* If zero is not in the first two bytes, move two bytes forward. */
14810 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14811 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14812 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14813 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14814 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14815 reg,
14816 tmpreg)));
14817 /* Emit lea manually to avoid clobbering of flags. */
14818 emit_insn (gen_rtx_SET (SImode, reg2,
14819 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14820
14821 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14822 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14823 emit_insn (gen_rtx_SET (VOIDmode, out,
14824 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14825 reg2,
14826 out)));
14827
14828 }
14829 else
14830 {
14831 rtx end_2_label = gen_label_rtx ();
14832 /* Is zero in the first two bytes? */
14833
14834 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14835 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14836 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14837 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14838 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14839 pc_rtx);
14840 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14841 JUMP_LABEL (tmp) = end_2_label;
14842
14843 /* Not in the first two. Move two bytes forward. */
14844 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14845 if (TARGET_64BIT)
14846 emit_insn (gen_adddi3 (out, out, const2_rtx));
14847 else
14848 emit_insn (gen_addsi3 (out, out, const2_rtx));
14849
14850 emit_label (end_2_label);
14851
14852 }
14853
14854 /* Avoid branch in fixing the byte. */
14855 tmpreg = gen_lowpart (QImode, tmpreg);
14856 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14857 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14858 if (TARGET_64BIT)
14859 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14860 else
14861 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14862
14863 emit_label (end_0_label);
14864 }
14865
14866 void
14867 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14868 rtx callarg2 ATTRIBUTE_UNUSED,
14869 rtx pop, int sibcall)
14870 {
14871 rtx use = NULL, call;
14872
14873 if (pop == const0_rtx)
14874 pop = NULL;
14875 gcc_assert (!TARGET_64BIT || !pop);
14876
14877 if (TARGET_MACHO && !TARGET_64BIT)
14878 {
14879 #if TARGET_MACHO
14880 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14881 fnaddr = machopic_indirect_call_target (fnaddr);
14882 #endif
14883 }
14884 else
14885 {
14886 /* Static functions and indirect calls don't need the pic register. */
14887 if (! TARGET_64BIT && flag_pic
14888 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14889 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14890 use_reg (&use, pic_offset_table_rtx);
14891 }
14892
14893 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14894 {
14895 rtx al = gen_rtx_REG (QImode, 0);
14896 emit_move_insn (al, callarg2);
14897 use_reg (&use, al);
14898 }
14899
14900 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14901 {
14902 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14903 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14904 }
14905 if (sibcall && TARGET_64BIT
14906 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14907 {
14908 rtx addr;
14909 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14910 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14911 emit_move_insn (fnaddr, addr);
14912 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14913 }
14914
14915 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14916 if (retval)
14917 call = gen_rtx_SET (VOIDmode, retval, call);
14918 if (pop)
14919 {
14920 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14921 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14922 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14923 }
14924
14925 call = emit_call_insn (call);
14926 if (use)
14927 CALL_INSN_FUNCTION_USAGE (call) = use;
14928 }
14929
14930 \f
14931 /* Clear stack slot assignments remembered from previous functions.
14932 This is called from INIT_EXPANDERS once before RTL is emitted for each
14933 function. */
14934
14935 static struct machine_function *
14936 ix86_init_machine_status (void)
14937 {
14938 struct machine_function *f;
14939
14940 f = ggc_alloc_cleared (sizeof (struct machine_function));
14941 f->use_fast_prologue_epilogue_nregs = -1;
14942 f->tls_descriptor_call_expanded_p = 0;
14943
14944 return f;
14945 }
14946
14947 /* Return a MEM corresponding to a stack slot with mode MODE.
14948 Allocate a new slot if necessary.
14949
14950 The RTL for a function can have several slots available: N is
14951 which slot to use. */
14952
14953 rtx
14954 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14955 {
14956 struct stack_local_entry *s;
14957
14958 gcc_assert (n < MAX_386_STACK_LOCALS);
14959
14960 for (s = ix86_stack_locals; s; s = s->next)
14961 if (s->mode == mode && s->n == n)
14962 return copy_rtx (s->rtl);
14963
14964 s = (struct stack_local_entry *)
14965 ggc_alloc (sizeof (struct stack_local_entry));
14966 s->n = n;
14967 s->mode = mode;
14968 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14969
14970 s->next = ix86_stack_locals;
14971 ix86_stack_locals = s;
14972 return s->rtl;
14973 }
14974
14975 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14976
14977 static GTY(()) rtx ix86_tls_symbol;
14978 rtx
14979 ix86_tls_get_addr (void)
14980 {
14981
14982 if (!ix86_tls_symbol)
14983 {
14984 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14985 (TARGET_ANY_GNU_TLS
14986 && !TARGET_64BIT)
14987 ? "___tls_get_addr"
14988 : "__tls_get_addr");
14989 }
14990
14991 return ix86_tls_symbol;
14992 }
14993
14994 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14995
14996 static GTY(()) rtx ix86_tls_module_base_symbol;
14997 rtx
14998 ix86_tls_module_base (void)
14999 {
15000
15001 if (!ix86_tls_module_base_symbol)
15002 {
15003 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15004 "_TLS_MODULE_BASE_");
15005 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15006 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15007 }
15008
15009 return ix86_tls_module_base_symbol;
15010 }
15011 \f
15012 /* Calculate the length of the memory address in the instruction
15013 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15014
15015 int
15016 memory_address_length (rtx addr)
15017 {
15018 struct ix86_address parts;
15019 rtx base, index, disp;
15020 int len;
15021 int ok;
15022
15023 if (GET_CODE (addr) == PRE_DEC
15024 || GET_CODE (addr) == POST_INC
15025 || GET_CODE (addr) == PRE_MODIFY
15026 || GET_CODE (addr) == POST_MODIFY)
15027 return 0;
15028
15029 ok = ix86_decompose_address (addr, &parts);
15030 gcc_assert (ok);
15031
15032 if (parts.base && GET_CODE (parts.base) == SUBREG)
15033 parts.base = SUBREG_REG (parts.base);
15034 if (parts.index && GET_CODE (parts.index) == SUBREG)
15035 parts.index = SUBREG_REG (parts.index);
15036
15037 base = parts.base;
15038 index = parts.index;
15039 disp = parts.disp;
15040 len = 0;
15041
15042 /* Rule of thumb:
15043 - esp as the base always wants an index,
15044 - ebp as the base always wants a displacement. */
15045
15046 /* Register Indirect. */
15047 if (base && !index && !disp)
15048 {
15049 /* esp (for its index) and ebp (for its displacement) need
15050 the two-byte modrm form. */
15051 if (addr == stack_pointer_rtx
15052 || addr == arg_pointer_rtx
15053 || addr == frame_pointer_rtx
15054 || addr == hard_frame_pointer_rtx)
15055 len = 1;
15056 }
15057
15058 /* Direct Addressing. */
15059 else if (disp && !base && !index)
15060 len = 4;
15061
15062 else
15063 {
15064 /* Find the length of the displacement constant. */
15065 if (disp)
15066 {
15067 if (base && satisfies_constraint_K (disp))
15068 len = 1;
15069 else
15070 len = 4;
15071 }
15072 /* ebp always wants a displacement. */
15073 else if (base == hard_frame_pointer_rtx)
15074 len = 1;
15075
15076 /* An index requires the two-byte modrm form.... */
15077 if (index
15078 /* ...like esp, which always wants an index. */
15079 || base == stack_pointer_rtx
15080 || base == arg_pointer_rtx
15081 || base == frame_pointer_rtx)
15082 len += 1;
15083 }
15084
15085 return len;
15086 }
15087
15088 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15089 is set, expect that insn have 8bit immediate alternative. */
15090 int
15091 ix86_attr_length_immediate_default (rtx insn, int shortform)
15092 {
15093 int len = 0;
15094 int i;
15095 extract_insn_cached (insn);
15096 for (i = recog_data.n_operands - 1; i >= 0; --i)
15097 if (CONSTANT_P (recog_data.operand[i]))
15098 {
15099 gcc_assert (!len);
15100 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15101 len = 1;
15102 else
15103 {
15104 switch (get_attr_mode (insn))
15105 {
15106 case MODE_QI:
15107 len+=1;
15108 break;
15109 case MODE_HI:
15110 len+=2;
15111 break;
15112 case MODE_SI:
15113 len+=4;
15114 break;
15115 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15116 case MODE_DI:
15117 len+=4;
15118 break;
15119 default:
15120 fatal_insn ("unknown insn mode", insn);
15121 }
15122 }
15123 }
15124 return len;
15125 }
15126 /* Compute default value for "length_address" attribute. */
15127 int
15128 ix86_attr_length_address_default (rtx insn)
15129 {
15130 int i;
15131
15132 if (get_attr_type (insn) == TYPE_LEA)
15133 {
15134 rtx set = PATTERN (insn);
15135
15136 if (GET_CODE (set) == PARALLEL)
15137 set = XVECEXP (set, 0, 0);
15138
15139 gcc_assert (GET_CODE (set) == SET);
15140
15141 return memory_address_length (SET_SRC (set));
15142 }
15143
15144 extract_insn_cached (insn);
15145 for (i = recog_data.n_operands - 1; i >= 0; --i)
15146 if (MEM_P (recog_data.operand[i]))
15147 {
15148 return memory_address_length (XEXP (recog_data.operand[i], 0));
15149 break;
15150 }
15151 return 0;
15152 }
15153 \f
15154 /* Return the maximum number of instructions a cpu can issue. */
15155
15156 static int
15157 ix86_issue_rate (void)
15158 {
15159 switch (ix86_tune)
15160 {
15161 case PROCESSOR_PENTIUM:
15162 case PROCESSOR_K6:
15163 return 2;
15164
15165 case PROCESSOR_PENTIUMPRO:
15166 case PROCESSOR_PENTIUM4:
15167 case PROCESSOR_ATHLON:
15168 case PROCESSOR_K8:
15169 case PROCESSOR_AMDFAM10:
15170 case PROCESSOR_NOCONA:
15171 case PROCESSOR_GENERIC32:
15172 case PROCESSOR_GENERIC64:
15173 return 3;
15174
15175 case PROCESSOR_CORE2:
15176 return 4;
15177
15178 default:
15179 return 1;
15180 }
15181 }
15182
15183 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15184 by DEP_INSN and nothing set by DEP_INSN. */
15185
15186 static int
15187 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15188 {
15189 rtx set, set2;
15190
15191 /* Simplify the test for uninteresting insns. */
15192 if (insn_type != TYPE_SETCC
15193 && insn_type != TYPE_ICMOV
15194 && insn_type != TYPE_FCMOV
15195 && insn_type != TYPE_IBR)
15196 return 0;
15197
15198 if ((set = single_set (dep_insn)) != 0)
15199 {
15200 set = SET_DEST (set);
15201 set2 = NULL_RTX;
15202 }
15203 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15204 && XVECLEN (PATTERN (dep_insn), 0) == 2
15205 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15206 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15207 {
15208 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15209 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15210 }
15211 else
15212 return 0;
15213
15214 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15215 return 0;
15216
15217 /* This test is true if the dependent insn reads the flags but
15218 not any other potentially set register. */
15219 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15220 return 0;
15221
15222 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15223 return 0;
15224
15225 return 1;
15226 }
15227
15228 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15229 address with operands set by DEP_INSN. */
15230
15231 static int
15232 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15233 {
15234 rtx addr;
15235
15236 if (insn_type == TYPE_LEA
15237 && TARGET_PENTIUM)
15238 {
15239 addr = PATTERN (insn);
15240
15241 if (GET_CODE (addr) == PARALLEL)
15242 addr = XVECEXP (addr, 0, 0);
15243
15244 gcc_assert (GET_CODE (addr) == SET);
15245
15246 addr = SET_SRC (addr);
15247 }
15248 else
15249 {
15250 int i;
15251 extract_insn_cached (insn);
15252 for (i = recog_data.n_operands - 1; i >= 0; --i)
15253 if (MEM_P (recog_data.operand[i]))
15254 {
15255 addr = XEXP (recog_data.operand[i], 0);
15256 goto found;
15257 }
15258 return 0;
15259 found:;
15260 }
15261
15262 return modified_in_p (addr, dep_insn);
15263 }
15264
15265 static int
15266 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15267 {
15268 enum attr_type insn_type, dep_insn_type;
15269 enum attr_memory memory;
15270 rtx set, set2;
15271 int dep_insn_code_number;
15272
15273 /* Anti and output dependencies have zero cost on all CPUs. */
15274 if (REG_NOTE_KIND (link) != 0)
15275 return 0;
15276
15277 dep_insn_code_number = recog_memoized (dep_insn);
15278
15279 /* If we can't recognize the insns, we can't really do anything. */
15280 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15281 return cost;
15282
15283 insn_type = get_attr_type (insn);
15284 dep_insn_type = get_attr_type (dep_insn);
15285
15286 switch (ix86_tune)
15287 {
15288 case PROCESSOR_PENTIUM:
15289 /* Address Generation Interlock adds a cycle of latency. */
15290 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15291 cost += 1;
15292
15293 /* ??? Compares pair with jump/setcc. */
15294 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15295 cost = 0;
15296
15297 /* Floating point stores require value to be ready one cycle earlier. */
15298 if (insn_type == TYPE_FMOV
15299 && get_attr_memory (insn) == MEMORY_STORE
15300 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15301 cost += 1;
15302 break;
15303
15304 case PROCESSOR_PENTIUMPRO:
15305 memory = get_attr_memory (insn);
15306
15307 /* INT->FP conversion is expensive. */
15308 if (get_attr_fp_int_src (dep_insn))
15309 cost += 5;
15310
15311 /* There is one cycle extra latency between an FP op and a store. */
15312 if (insn_type == TYPE_FMOV
15313 && (set = single_set (dep_insn)) != NULL_RTX
15314 && (set2 = single_set (insn)) != NULL_RTX
15315 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15316 && MEM_P (SET_DEST (set2)))
15317 cost += 1;
15318
15319 /* Show ability of reorder buffer to hide latency of load by executing
15320 in parallel with previous instruction in case
15321 previous instruction is not needed to compute the address. */
15322 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15323 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15324 {
15325 /* Claim moves to take one cycle, as core can issue one load
15326 at time and the next load can start cycle later. */
15327 if (dep_insn_type == TYPE_IMOV
15328 || dep_insn_type == TYPE_FMOV)
15329 cost = 1;
15330 else if (cost > 1)
15331 cost--;
15332 }
15333 break;
15334
15335 case PROCESSOR_K6:
15336 memory = get_attr_memory (insn);
15337
15338 /* The esp dependency is resolved before the instruction is really
15339 finished. */
15340 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15341 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15342 return 1;
15343
15344 /* INT->FP conversion is expensive. */
15345 if (get_attr_fp_int_src (dep_insn))
15346 cost += 5;
15347
15348 /* Show ability of reorder buffer to hide latency of load by executing
15349 in parallel with previous instruction in case
15350 previous instruction is not needed to compute the address. */
15351 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15352 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15353 {
15354 /* Claim moves to take one cycle, as core can issue one load
15355 at time and the next load can start cycle later. */
15356 if (dep_insn_type == TYPE_IMOV
15357 || dep_insn_type == TYPE_FMOV)
15358 cost = 1;
15359 else if (cost > 2)
15360 cost -= 2;
15361 else
15362 cost = 1;
15363 }
15364 break;
15365
15366 case PROCESSOR_ATHLON:
15367 case PROCESSOR_K8:
15368 case PROCESSOR_AMDFAM10:
15369 case PROCESSOR_GENERIC32:
15370 case PROCESSOR_GENERIC64:
15371 memory = get_attr_memory (insn);
15372
15373 /* Show ability of reorder buffer to hide latency of load by executing
15374 in parallel with previous instruction in case
15375 previous instruction is not needed to compute the address. */
15376 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15377 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15378 {
15379 enum attr_unit unit = get_attr_unit (insn);
15380 int loadcost = 3;
15381
15382 /* Because of the difference between the length of integer and
15383 floating unit pipeline preparation stages, the memory operands
15384 for floating point are cheaper.
15385
15386 ??? For Athlon it the difference is most probably 2. */
15387 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15388 loadcost = 3;
15389 else
15390 loadcost = TARGET_ATHLON ? 2 : 0;
15391
15392 if (cost >= loadcost)
15393 cost -= loadcost;
15394 else
15395 cost = 0;
15396 }
15397
15398 default:
15399 break;
15400 }
15401
15402 return cost;
15403 }
15404
15405 /* How many alternative schedules to try. This should be as wide as the
15406 scheduling freedom in the DFA, but no wider. Making this value too
15407 large results extra work for the scheduler. */
15408
15409 static int
15410 ia32_multipass_dfa_lookahead (void)
15411 {
15412 if (ix86_tune == PROCESSOR_PENTIUM)
15413 return 2;
15414
15415 if (ix86_tune == PROCESSOR_PENTIUMPRO
15416 || ix86_tune == PROCESSOR_K6)
15417 return 1;
15418
15419 else
15420 return 0;
15421 }
15422
15423 \f
15424 /* Compute the alignment given to a constant that is being placed in memory.
15425 EXP is the constant and ALIGN is the alignment that the object would
15426 ordinarily have.
15427 The value of this function is used instead of that alignment to align
15428 the object. */
15429
15430 int
15431 ix86_constant_alignment (tree exp, int align)
15432 {
15433 if (TREE_CODE (exp) == REAL_CST)
15434 {
15435 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15436 return 64;
15437 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15438 return 128;
15439 }
15440 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15441 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15442 return BITS_PER_WORD;
15443
15444 return align;
15445 }
15446
15447 /* Compute the alignment for a static variable.
15448 TYPE is the data type, and ALIGN is the alignment that
15449 the object would ordinarily have. The value of this function is used
15450 instead of that alignment to align the object. */
15451
15452 int
15453 ix86_data_alignment (tree type, int align)
15454 {
15455 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15456
15457 if (AGGREGATE_TYPE_P (type)
15458 && TYPE_SIZE (type)
15459 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15460 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15461 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15462 && align < max_align)
15463 align = max_align;
15464
15465 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15466 to 16byte boundary. */
15467 if (TARGET_64BIT)
15468 {
15469 if (AGGREGATE_TYPE_P (type)
15470 && TYPE_SIZE (type)
15471 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15472 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15473 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15474 return 128;
15475 }
15476
15477 if (TREE_CODE (type) == ARRAY_TYPE)
15478 {
15479 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15480 return 64;
15481 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15482 return 128;
15483 }
15484 else if (TREE_CODE (type) == COMPLEX_TYPE)
15485 {
15486
15487 if (TYPE_MODE (type) == DCmode && align < 64)
15488 return 64;
15489 if (TYPE_MODE (type) == XCmode && align < 128)
15490 return 128;
15491 }
15492 else if ((TREE_CODE (type) == RECORD_TYPE
15493 || TREE_CODE (type) == UNION_TYPE
15494 || TREE_CODE (type) == QUAL_UNION_TYPE)
15495 && TYPE_FIELDS (type))
15496 {
15497 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15498 return 64;
15499 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15500 return 128;
15501 }
15502 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15503 || TREE_CODE (type) == INTEGER_TYPE)
15504 {
15505 if (TYPE_MODE (type) == DFmode && align < 64)
15506 return 64;
15507 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15508 return 128;
15509 }
15510
15511 return align;
15512 }
15513
15514 /* Compute the alignment for a local variable.
15515 TYPE is the data type, and ALIGN is the alignment that
15516 the object would ordinarily have. The value of this macro is used
15517 instead of that alignment to align the object. */
15518
15519 int
15520 ix86_local_alignment (tree type, int align)
15521 {
15522 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15523 to 16byte boundary. */
15524 if (TARGET_64BIT)
15525 {
15526 if (AGGREGATE_TYPE_P (type)
15527 && TYPE_SIZE (type)
15528 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15529 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15530 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15531 return 128;
15532 }
15533 if (TREE_CODE (type) == ARRAY_TYPE)
15534 {
15535 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15536 return 64;
15537 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15538 return 128;
15539 }
15540 else if (TREE_CODE (type) == COMPLEX_TYPE)
15541 {
15542 if (TYPE_MODE (type) == DCmode && align < 64)
15543 return 64;
15544 if (TYPE_MODE (type) == XCmode && align < 128)
15545 return 128;
15546 }
15547 else if ((TREE_CODE (type) == RECORD_TYPE
15548 || TREE_CODE (type) == UNION_TYPE
15549 || TREE_CODE (type) == QUAL_UNION_TYPE)
15550 && TYPE_FIELDS (type))
15551 {
15552 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15553 return 64;
15554 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15555 return 128;
15556 }
15557 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15558 || TREE_CODE (type) == INTEGER_TYPE)
15559 {
15560
15561 if (TYPE_MODE (type) == DFmode && align < 64)
15562 return 64;
15563 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15564 return 128;
15565 }
15566 return align;
15567 }
15568 \f
15569 /* Emit RTL insns to initialize the variable parts of a trampoline.
15570 FNADDR is an RTX for the address of the function's pure code.
15571 CXT is an RTX for the static chain value for the function. */
15572 void
15573 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15574 {
15575 if (!TARGET_64BIT)
15576 {
15577 /* Compute offset from the end of the jmp to the target function. */
15578 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15579 plus_constant (tramp, 10),
15580 NULL_RTX, 1, OPTAB_DIRECT);
15581 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15582 gen_int_mode (0xb9, QImode));
15583 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15584 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15585 gen_int_mode (0xe9, QImode));
15586 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15587 }
15588 else
15589 {
15590 int offset = 0;
15591 /* Try to load address using shorter movl instead of movabs.
15592 We may want to support movq for kernel mode, but kernel does not use
15593 trampolines at the moment. */
15594 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15595 {
15596 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15597 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15598 gen_int_mode (0xbb41, HImode));
15599 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15600 gen_lowpart (SImode, fnaddr));
15601 offset += 6;
15602 }
15603 else
15604 {
15605 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15606 gen_int_mode (0xbb49, HImode));
15607 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15608 fnaddr);
15609 offset += 10;
15610 }
15611 /* Load static chain using movabs to r10. */
15612 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15613 gen_int_mode (0xba49, HImode));
15614 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15615 cxt);
15616 offset += 10;
15617 /* Jump to the r11 */
15618 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15619 gen_int_mode (0xff49, HImode));
15620 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15621 gen_int_mode (0xe3, QImode));
15622 offset += 3;
15623 gcc_assert (offset <= TRAMPOLINE_SIZE);
15624 }
15625
15626 #ifdef ENABLE_EXECUTE_STACK
15627 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15628 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15629 #endif
15630 }
15631 \f
15632 /* Codes for all the SSE/MMX builtins. */
15633 enum ix86_builtins
15634 {
15635 IX86_BUILTIN_ADDPS,
15636 IX86_BUILTIN_ADDSS,
15637 IX86_BUILTIN_DIVPS,
15638 IX86_BUILTIN_DIVSS,
15639 IX86_BUILTIN_MULPS,
15640 IX86_BUILTIN_MULSS,
15641 IX86_BUILTIN_SUBPS,
15642 IX86_BUILTIN_SUBSS,
15643
15644 IX86_BUILTIN_CMPEQPS,
15645 IX86_BUILTIN_CMPLTPS,
15646 IX86_BUILTIN_CMPLEPS,
15647 IX86_BUILTIN_CMPGTPS,
15648 IX86_BUILTIN_CMPGEPS,
15649 IX86_BUILTIN_CMPNEQPS,
15650 IX86_BUILTIN_CMPNLTPS,
15651 IX86_BUILTIN_CMPNLEPS,
15652 IX86_BUILTIN_CMPNGTPS,
15653 IX86_BUILTIN_CMPNGEPS,
15654 IX86_BUILTIN_CMPORDPS,
15655 IX86_BUILTIN_CMPUNORDPS,
15656 IX86_BUILTIN_CMPEQSS,
15657 IX86_BUILTIN_CMPLTSS,
15658 IX86_BUILTIN_CMPLESS,
15659 IX86_BUILTIN_CMPNEQSS,
15660 IX86_BUILTIN_CMPNLTSS,
15661 IX86_BUILTIN_CMPNLESS,
15662 IX86_BUILTIN_CMPNGTSS,
15663 IX86_BUILTIN_CMPNGESS,
15664 IX86_BUILTIN_CMPORDSS,
15665 IX86_BUILTIN_CMPUNORDSS,
15666
15667 IX86_BUILTIN_COMIEQSS,
15668 IX86_BUILTIN_COMILTSS,
15669 IX86_BUILTIN_COMILESS,
15670 IX86_BUILTIN_COMIGTSS,
15671 IX86_BUILTIN_COMIGESS,
15672 IX86_BUILTIN_COMINEQSS,
15673 IX86_BUILTIN_UCOMIEQSS,
15674 IX86_BUILTIN_UCOMILTSS,
15675 IX86_BUILTIN_UCOMILESS,
15676 IX86_BUILTIN_UCOMIGTSS,
15677 IX86_BUILTIN_UCOMIGESS,
15678 IX86_BUILTIN_UCOMINEQSS,
15679
15680 IX86_BUILTIN_CVTPI2PS,
15681 IX86_BUILTIN_CVTPS2PI,
15682 IX86_BUILTIN_CVTSI2SS,
15683 IX86_BUILTIN_CVTSI642SS,
15684 IX86_BUILTIN_CVTSS2SI,
15685 IX86_BUILTIN_CVTSS2SI64,
15686 IX86_BUILTIN_CVTTPS2PI,
15687 IX86_BUILTIN_CVTTSS2SI,
15688 IX86_BUILTIN_CVTTSS2SI64,
15689
15690 IX86_BUILTIN_MAXPS,
15691 IX86_BUILTIN_MAXSS,
15692 IX86_BUILTIN_MINPS,
15693 IX86_BUILTIN_MINSS,
15694
15695 IX86_BUILTIN_LOADUPS,
15696 IX86_BUILTIN_STOREUPS,
15697 IX86_BUILTIN_MOVSS,
15698
15699 IX86_BUILTIN_MOVHLPS,
15700 IX86_BUILTIN_MOVLHPS,
15701 IX86_BUILTIN_LOADHPS,
15702 IX86_BUILTIN_LOADLPS,
15703 IX86_BUILTIN_STOREHPS,
15704 IX86_BUILTIN_STORELPS,
15705
15706 IX86_BUILTIN_MASKMOVQ,
15707 IX86_BUILTIN_MOVMSKPS,
15708 IX86_BUILTIN_PMOVMSKB,
15709
15710 IX86_BUILTIN_MOVNTPS,
15711 IX86_BUILTIN_MOVNTQ,
15712
15713 IX86_BUILTIN_LOADDQU,
15714 IX86_BUILTIN_STOREDQU,
15715
15716 IX86_BUILTIN_PACKSSWB,
15717 IX86_BUILTIN_PACKSSDW,
15718 IX86_BUILTIN_PACKUSWB,
15719
15720 IX86_BUILTIN_PADDB,
15721 IX86_BUILTIN_PADDW,
15722 IX86_BUILTIN_PADDD,
15723 IX86_BUILTIN_PADDQ,
15724 IX86_BUILTIN_PADDSB,
15725 IX86_BUILTIN_PADDSW,
15726 IX86_BUILTIN_PADDUSB,
15727 IX86_BUILTIN_PADDUSW,
15728 IX86_BUILTIN_PSUBB,
15729 IX86_BUILTIN_PSUBW,
15730 IX86_BUILTIN_PSUBD,
15731 IX86_BUILTIN_PSUBQ,
15732 IX86_BUILTIN_PSUBSB,
15733 IX86_BUILTIN_PSUBSW,
15734 IX86_BUILTIN_PSUBUSB,
15735 IX86_BUILTIN_PSUBUSW,
15736
15737 IX86_BUILTIN_PAND,
15738 IX86_BUILTIN_PANDN,
15739 IX86_BUILTIN_POR,
15740 IX86_BUILTIN_PXOR,
15741
15742 IX86_BUILTIN_PAVGB,
15743 IX86_BUILTIN_PAVGW,
15744
15745 IX86_BUILTIN_PCMPEQB,
15746 IX86_BUILTIN_PCMPEQW,
15747 IX86_BUILTIN_PCMPEQD,
15748 IX86_BUILTIN_PCMPGTB,
15749 IX86_BUILTIN_PCMPGTW,
15750 IX86_BUILTIN_PCMPGTD,
15751
15752 IX86_BUILTIN_PMADDWD,
15753
15754 IX86_BUILTIN_PMAXSW,
15755 IX86_BUILTIN_PMAXUB,
15756 IX86_BUILTIN_PMINSW,
15757 IX86_BUILTIN_PMINUB,
15758
15759 IX86_BUILTIN_PMULHUW,
15760 IX86_BUILTIN_PMULHW,
15761 IX86_BUILTIN_PMULLW,
15762
15763 IX86_BUILTIN_PSADBW,
15764 IX86_BUILTIN_PSHUFW,
15765
15766 IX86_BUILTIN_PSLLW,
15767 IX86_BUILTIN_PSLLD,
15768 IX86_BUILTIN_PSLLQ,
15769 IX86_BUILTIN_PSRAW,
15770 IX86_BUILTIN_PSRAD,
15771 IX86_BUILTIN_PSRLW,
15772 IX86_BUILTIN_PSRLD,
15773 IX86_BUILTIN_PSRLQ,
15774 IX86_BUILTIN_PSLLWI,
15775 IX86_BUILTIN_PSLLDI,
15776 IX86_BUILTIN_PSLLQI,
15777 IX86_BUILTIN_PSRAWI,
15778 IX86_BUILTIN_PSRADI,
15779 IX86_BUILTIN_PSRLWI,
15780 IX86_BUILTIN_PSRLDI,
15781 IX86_BUILTIN_PSRLQI,
15782
15783 IX86_BUILTIN_PUNPCKHBW,
15784 IX86_BUILTIN_PUNPCKHWD,
15785 IX86_BUILTIN_PUNPCKHDQ,
15786 IX86_BUILTIN_PUNPCKLBW,
15787 IX86_BUILTIN_PUNPCKLWD,
15788 IX86_BUILTIN_PUNPCKLDQ,
15789
15790 IX86_BUILTIN_SHUFPS,
15791
15792 IX86_BUILTIN_RCPPS,
15793 IX86_BUILTIN_RCPSS,
15794 IX86_BUILTIN_RSQRTPS,
15795 IX86_BUILTIN_RSQRTSS,
15796 IX86_BUILTIN_SQRTPS,
15797 IX86_BUILTIN_SQRTSS,
15798
15799 IX86_BUILTIN_UNPCKHPS,
15800 IX86_BUILTIN_UNPCKLPS,
15801
15802 IX86_BUILTIN_ANDPS,
15803 IX86_BUILTIN_ANDNPS,
15804 IX86_BUILTIN_ORPS,
15805 IX86_BUILTIN_XORPS,
15806
15807 IX86_BUILTIN_EMMS,
15808 IX86_BUILTIN_LDMXCSR,
15809 IX86_BUILTIN_STMXCSR,
15810 IX86_BUILTIN_SFENCE,
15811
15812 /* 3DNow! Original */
15813 IX86_BUILTIN_FEMMS,
15814 IX86_BUILTIN_PAVGUSB,
15815 IX86_BUILTIN_PF2ID,
15816 IX86_BUILTIN_PFACC,
15817 IX86_BUILTIN_PFADD,
15818 IX86_BUILTIN_PFCMPEQ,
15819 IX86_BUILTIN_PFCMPGE,
15820 IX86_BUILTIN_PFCMPGT,
15821 IX86_BUILTIN_PFMAX,
15822 IX86_BUILTIN_PFMIN,
15823 IX86_BUILTIN_PFMUL,
15824 IX86_BUILTIN_PFRCP,
15825 IX86_BUILTIN_PFRCPIT1,
15826 IX86_BUILTIN_PFRCPIT2,
15827 IX86_BUILTIN_PFRSQIT1,
15828 IX86_BUILTIN_PFRSQRT,
15829 IX86_BUILTIN_PFSUB,
15830 IX86_BUILTIN_PFSUBR,
15831 IX86_BUILTIN_PI2FD,
15832 IX86_BUILTIN_PMULHRW,
15833
15834 /* 3DNow! Athlon Extensions */
15835 IX86_BUILTIN_PF2IW,
15836 IX86_BUILTIN_PFNACC,
15837 IX86_BUILTIN_PFPNACC,
15838 IX86_BUILTIN_PI2FW,
15839 IX86_BUILTIN_PSWAPDSI,
15840 IX86_BUILTIN_PSWAPDSF,
15841
15842 /* SSE2 */
15843 IX86_BUILTIN_ADDPD,
15844 IX86_BUILTIN_ADDSD,
15845 IX86_BUILTIN_DIVPD,
15846 IX86_BUILTIN_DIVSD,
15847 IX86_BUILTIN_MULPD,
15848 IX86_BUILTIN_MULSD,
15849 IX86_BUILTIN_SUBPD,
15850 IX86_BUILTIN_SUBSD,
15851
15852 IX86_BUILTIN_CMPEQPD,
15853 IX86_BUILTIN_CMPLTPD,
15854 IX86_BUILTIN_CMPLEPD,
15855 IX86_BUILTIN_CMPGTPD,
15856 IX86_BUILTIN_CMPGEPD,
15857 IX86_BUILTIN_CMPNEQPD,
15858 IX86_BUILTIN_CMPNLTPD,
15859 IX86_BUILTIN_CMPNLEPD,
15860 IX86_BUILTIN_CMPNGTPD,
15861 IX86_BUILTIN_CMPNGEPD,
15862 IX86_BUILTIN_CMPORDPD,
15863 IX86_BUILTIN_CMPUNORDPD,
15864 IX86_BUILTIN_CMPNEPD,
15865 IX86_BUILTIN_CMPEQSD,
15866 IX86_BUILTIN_CMPLTSD,
15867 IX86_BUILTIN_CMPLESD,
15868 IX86_BUILTIN_CMPNEQSD,
15869 IX86_BUILTIN_CMPNLTSD,
15870 IX86_BUILTIN_CMPNLESD,
15871 IX86_BUILTIN_CMPORDSD,
15872 IX86_BUILTIN_CMPUNORDSD,
15873 IX86_BUILTIN_CMPNESD,
15874
15875 IX86_BUILTIN_COMIEQSD,
15876 IX86_BUILTIN_COMILTSD,
15877 IX86_BUILTIN_COMILESD,
15878 IX86_BUILTIN_COMIGTSD,
15879 IX86_BUILTIN_COMIGESD,
15880 IX86_BUILTIN_COMINEQSD,
15881 IX86_BUILTIN_UCOMIEQSD,
15882 IX86_BUILTIN_UCOMILTSD,
15883 IX86_BUILTIN_UCOMILESD,
15884 IX86_BUILTIN_UCOMIGTSD,
15885 IX86_BUILTIN_UCOMIGESD,
15886 IX86_BUILTIN_UCOMINEQSD,
15887
15888 IX86_BUILTIN_MAXPD,
15889 IX86_BUILTIN_MAXSD,
15890 IX86_BUILTIN_MINPD,
15891 IX86_BUILTIN_MINSD,
15892
15893 IX86_BUILTIN_ANDPD,
15894 IX86_BUILTIN_ANDNPD,
15895 IX86_BUILTIN_ORPD,
15896 IX86_BUILTIN_XORPD,
15897
15898 IX86_BUILTIN_SQRTPD,
15899 IX86_BUILTIN_SQRTSD,
15900
15901 IX86_BUILTIN_UNPCKHPD,
15902 IX86_BUILTIN_UNPCKLPD,
15903
15904 IX86_BUILTIN_SHUFPD,
15905
15906 IX86_BUILTIN_LOADUPD,
15907 IX86_BUILTIN_STOREUPD,
15908 IX86_BUILTIN_MOVSD,
15909
15910 IX86_BUILTIN_LOADHPD,
15911 IX86_BUILTIN_LOADLPD,
15912
15913 IX86_BUILTIN_CVTDQ2PD,
15914 IX86_BUILTIN_CVTDQ2PS,
15915
15916 IX86_BUILTIN_CVTPD2DQ,
15917 IX86_BUILTIN_CVTPD2PI,
15918 IX86_BUILTIN_CVTPD2PS,
15919 IX86_BUILTIN_CVTTPD2DQ,
15920 IX86_BUILTIN_CVTTPD2PI,
15921
15922 IX86_BUILTIN_CVTPI2PD,
15923 IX86_BUILTIN_CVTSI2SD,
15924 IX86_BUILTIN_CVTSI642SD,
15925
15926 IX86_BUILTIN_CVTSD2SI,
15927 IX86_BUILTIN_CVTSD2SI64,
15928 IX86_BUILTIN_CVTSD2SS,
15929 IX86_BUILTIN_CVTSS2SD,
15930 IX86_BUILTIN_CVTTSD2SI,
15931 IX86_BUILTIN_CVTTSD2SI64,
15932
15933 IX86_BUILTIN_CVTPS2DQ,
15934 IX86_BUILTIN_CVTPS2PD,
15935 IX86_BUILTIN_CVTTPS2DQ,
15936
15937 IX86_BUILTIN_MOVNTI,
15938 IX86_BUILTIN_MOVNTPD,
15939 IX86_BUILTIN_MOVNTDQ,
15940
15941 /* SSE2 MMX */
15942 IX86_BUILTIN_MASKMOVDQU,
15943 IX86_BUILTIN_MOVMSKPD,
15944 IX86_BUILTIN_PMOVMSKB128,
15945
15946 IX86_BUILTIN_PACKSSWB128,
15947 IX86_BUILTIN_PACKSSDW128,
15948 IX86_BUILTIN_PACKUSWB128,
15949
15950 IX86_BUILTIN_PADDB128,
15951 IX86_BUILTIN_PADDW128,
15952 IX86_BUILTIN_PADDD128,
15953 IX86_BUILTIN_PADDQ128,
15954 IX86_BUILTIN_PADDSB128,
15955 IX86_BUILTIN_PADDSW128,
15956 IX86_BUILTIN_PADDUSB128,
15957 IX86_BUILTIN_PADDUSW128,
15958 IX86_BUILTIN_PSUBB128,
15959 IX86_BUILTIN_PSUBW128,
15960 IX86_BUILTIN_PSUBD128,
15961 IX86_BUILTIN_PSUBQ128,
15962 IX86_BUILTIN_PSUBSB128,
15963 IX86_BUILTIN_PSUBSW128,
15964 IX86_BUILTIN_PSUBUSB128,
15965 IX86_BUILTIN_PSUBUSW128,
15966
15967 IX86_BUILTIN_PAND128,
15968 IX86_BUILTIN_PANDN128,
15969 IX86_BUILTIN_POR128,
15970 IX86_BUILTIN_PXOR128,
15971
15972 IX86_BUILTIN_PAVGB128,
15973 IX86_BUILTIN_PAVGW128,
15974
15975 IX86_BUILTIN_PCMPEQB128,
15976 IX86_BUILTIN_PCMPEQW128,
15977 IX86_BUILTIN_PCMPEQD128,
15978 IX86_BUILTIN_PCMPGTB128,
15979 IX86_BUILTIN_PCMPGTW128,
15980 IX86_BUILTIN_PCMPGTD128,
15981
15982 IX86_BUILTIN_PMADDWD128,
15983
15984 IX86_BUILTIN_PMAXSW128,
15985 IX86_BUILTIN_PMAXUB128,
15986 IX86_BUILTIN_PMINSW128,
15987 IX86_BUILTIN_PMINUB128,
15988
15989 IX86_BUILTIN_PMULUDQ,
15990 IX86_BUILTIN_PMULUDQ128,
15991 IX86_BUILTIN_PMULHUW128,
15992 IX86_BUILTIN_PMULHW128,
15993 IX86_BUILTIN_PMULLW128,
15994
15995 IX86_BUILTIN_PSADBW128,
15996 IX86_BUILTIN_PSHUFHW,
15997 IX86_BUILTIN_PSHUFLW,
15998 IX86_BUILTIN_PSHUFD,
15999
16000 IX86_BUILTIN_PSLLW128,
16001 IX86_BUILTIN_PSLLD128,
16002 IX86_BUILTIN_PSLLQ128,
16003 IX86_BUILTIN_PSRAW128,
16004 IX86_BUILTIN_PSRAD128,
16005 IX86_BUILTIN_PSRLW128,
16006 IX86_BUILTIN_PSRLD128,
16007 IX86_BUILTIN_PSRLQ128,
16008 IX86_BUILTIN_PSLLDQI128,
16009 IX86_BUILTIN_PSLLWI128,
16010 IX86_BUILTIN_PSLLDI128,
16011 IX86_BUILTIN_PSLLQI128,
16012 IX86_BUILTIN_PSRAWI128,
16013 IX86_BUILTIN_PSRADI128,
16014 IX86_BUILTIN_PSRLDQI128,
16015 IX86_BUILTIN_PSRLWI128,
16016 IX86_BUILTIN_PSRLDI128,
16017 IX86_BUILTIN_PSRLQI128,
16018
16019 IX86_BUILTIN_PUNPCKHBW128,
16020 IX86_BUILTIN_PUNPCKHWD128,
16021 IX86_BUILTIN_PUNPCKHDQ128,
16022 IX86_BUILTIN_PUNPCKHQDQ128,
16023 IX86_BUILTIN_PUNPCKLBW128,
16024 IX86_BUILTIN_PUNPCKLWD128,
16025 IX86_BUILTIN_PUNPCKLDQ128,
16026 IX86_BUILTIN_PUNPCKLQDQ128,
16027
16028 IX86_BUILTIN_CLFLUSH,
16029 IX86_BUILTIN_MFENCE,
16030 IX86_BUILTIN_LFENCE,
16031
16032 /* Prescott New Instructions. */
16033 IX86_BUILTIN_ADDSUBPS,
16034 IX86_BUILTIN_HADDPS,
16035 IX86_BUILTIN_HSUBPS,
16036 IX86_BUILTIN_MOVSHDUP,
16037 IX86_BUILTIN_MOVSLDUP,
16038 IX86_BUILTIN_ADDSUBPD,
16039 IX86_BUILTIN_HADDPD,
16040 IX86_BUILTIN_HSUBPD,
16041 IX86_BUILTIN_LDDQU,
16042
16043 IX86_BUILTIN_MONITOR,
16044 IX86_BUILTIN_MWAIT,
16045
16046 /* SSSE3. */
16047 IX86_BUILTIN_PHADDW,
16048 IX86_BUILTIN_PHADDD,
16049 IX86_BUILTIN_PHADDSW,
16050 IX86_BUILTIN_PHSUBW,
16051 IX86_BUILTIN_PHSUBD,
16052 IX86_BUILTIN_PHSUBSW,
16053 IX86_BUILTIN_PMADDUBSW,
16054 IX86_BUILTIN_PMULHRSW,
16055 IX86_BUILTIN_PSHUFB,
16056 IX86_BUILTIN_PSIGNB,
16057 IX86_BUILTIN_PSIGNW,
16058 IX86_BUILTIN_PSIGND,
16059 IX86_BUILTIN_PALIGNR,
16060 IX86_BUILTIN_PABSB,
16061 IX86_BUILTIN_PABSW,
16062 IX86_BUILTIN_PABSD,
16063
16064 IX86_BUILTIN_PHADDW128,
16065 IX86_BUILTIN_PHADDD128,
16066 IX86_BUILTIN_PHADDSW128,
16067 IX86_BUILTIN_PHSUBW128,
16068 IX86_BUILTIN_PHSUBD128,
16069 IX86_BUILTIN_PHSUBSW128,
16070 IX86_BUILTIN_PMADDUBSW128,
16071 IX86_BUILTIN_PMULHRSW128,
16072 IX86_BUILTIN_PSHUFB128,
16073 IX86_BUILTIN_PSIGNB128,
16074 IX86_BUILTIN_PSIGNW128,
16075 IX86_BUILTIN_PSIGND128,
16076 IX86_BUILTIN_PALIGNR128,
16077 IX86_BUILTIN_PABSB128,
16078 IX86_BUILTIN_PABSW128,
16079 IX86_BUILTIN_PABSD128,
16080
16081 /* AMDFAM10 - SSE4A New Instructions. */
16082 IX86_BUILTIN_MOVNTSD,
16083 IX86_BUILTIN_MOVNTSS,
16084 IX86_BUILTIN_EXTRQI,
16085 IX86_BUILTIN_EXTRQ,
16086 IX86_BUILTIN_INSERTQI,
16087 IX86_BUILTIN_INSERTQ,
16088
16089 IX86_BUILTIN_VEC_INIT_V2SI,
16090 IX86_BUILTIN_VEC_INIT_V4HI,
16091 IX86_BUILTIN_VEC_INIT_V8QI,
16092 IX86_BUILTIN_VEC_EXT_V2DF,
16093 IX86_BUILTIN_VEC_EXT_V2DI,
16094 IX86_BUILTIN_VEC_EXT_V4SF,
16095 IX86_BUILTIN_VEC_EXT_V4SI,
16096 IX86_BUILTIN_VEC_EXT_V8HI,
16097 IX86_BUILTIN_VEC_EXT_V2SI,
16098 IX86_BUILTIN_VEC_EXT_V4HI,
16099 IX86_BUILTIN_VEC_SET_V8HI,
16100 IX86_BUILTIN_VEC_SET_V4HI,
16101
16102 IX86_BUILTIN_MAX
16103 };
16104
16105 /* Table for the ix86 builtin decls. */
16106 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16107
16108 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16109 * if the target_flags include one of MASK. Stores the function decl
16110 * in the ix86_builtins array.
16111 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16112
16113 static inline tree
16114 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16115 {
16116 tree decl = NULL_TREE;
16117
16118 if (mask & target_flags
16119 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16120 {
16121 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16122 NULL, NULL_TREE);
16123 ix86_builtins[(int) code] = decl;
16124 }
16125
16126 return decl;
16127 }
16128
16129 /* Like def_builtin, but also marks the function decl "const". */
16130
16131 static inline tree
16132 def_builtin_const (int mask, const char *name, tree type,
16133 enum ix86_builtins code)
16134 {
16135 tree decl = def_builtin (mask, name, type, code);
16136 if (decl)
16137 TREE_READONLY (decl) = 1;
16138 return decl;
16139 }
16140
16141 /* Bits for builtin_description.flag. */
16142
16143 /* Set when we don't support the comparison natively, and should
16144 swap_comparison in order to support it. */
16145 #define BUILTIN_DESC_SWAP_OPERANDS 1
16146
16147 struct builtin_description
16148 {
16149 const unsigned int mask;
16150 const enum insn_code icode;
16151 const char *const name;
16152 const enum ix86_builtins code;
16153 const enum rtx_code comparison;
16154 const unsigned int flag;
16155 };
16156
16157 static const struct builtin_description bdesc_comi[] =
16158 {
16159 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16160 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16161 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16162 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16163 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16164 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16165 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16166 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16167 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16168 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16169 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16170 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16171 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16172 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16173 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16174 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16175 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16176 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16177 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16178 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16179 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16180 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16181 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16182 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16183 };
16184
16185 static const struct builtin_description bdesc_2arg[] =
16186 {
16187 /* SSE */
16188 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16189 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16190 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16191 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16192 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16193 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16194 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16195 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16196
16197 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16198 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16199 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16200 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16201 BUILTIN_DESC_SWAP_OPERANDS },
16202 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16203 BUILTIN_DESC_SWAP_OPERANDS },
16204 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16205 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16206 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16207 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16208 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16209 BUILTIN_DESC_SWAP_OPERANDS },
16210 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16211 BUILTIN_DESC_SWAP_OPERANDS },
16212 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16213 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16214 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16215 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16216 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16217 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16218 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16219 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16220 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16221 BUILTIN_DESC_SWAP_OPERANDS },
16222 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16223 BUILTIN_DESC_SWAP_OPERANDS },
16224 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16225
16226 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16227 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16228 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16229 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16230
16231 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16232 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16233 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16234 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16235
16236 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16237 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16238 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16239 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16240 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16241
16242 /* MMX */
16243 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16244 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16245 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16246 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16247 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16248 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16249 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16250 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16251
16252 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16253 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16254 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16255 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16256 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16257 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16258 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16259 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16260
16261 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16262 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16263 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16264
16265 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16266 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16267 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16268 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16269
16270 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16271 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16272
16273 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16274 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16275 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16276 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16277 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16278 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16279
16280 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16281 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16282 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16283 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16284
16285 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16286 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16287 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16288 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16289 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16290 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16291
16292 /* Special. */
16293 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16294 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16295 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16296
16297 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16298 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16299 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16300
16301 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16302 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16303 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16304 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16305 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16306 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16307
16308 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16309 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16310 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16311 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16312 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16313 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16314
16315 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16316 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16317 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16318 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16319
16320 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16321 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16322
16323 /* SSE2 */
16324 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16325 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16326 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16327 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16328 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16329 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16330 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16331 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16332
16333 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16334 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16335 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16336 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16337 BUILTIN_DESC_SWAP_OPERANDS },
16338 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16339 BUILTIN_DESC_SWAP_OPERANDS },
16340 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16341 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16342 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16343 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16344 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16345 BUILTIN_DESC_SWAP_OPERANDS },
16346 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16347 BUILTIN_DESC_SWAP_OPERANDS },
16348 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16349 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16350 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16351 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16352 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16353 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16354 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16355 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16356 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16357
16358 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16359 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16360 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16362
16363 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16364 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16365 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16366 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16367
16368 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16371
16372 /* SSE2 MMX */
16373 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16374 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16375 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16376 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16378 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16379 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16380 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16381
16382 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16383 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16384 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16385 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16386 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16387 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16388 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16389 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16390
16391 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16392 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16393
16394 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16395 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16396 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16397 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16398
16399 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16400 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16401
16402 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16403 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16404 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16405 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16406 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16407 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16408
16409 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16410 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16411 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16412 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16413
16414 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16415 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16416 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16417 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16418 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16419 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16420 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16421 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16422
16423 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16424 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16425 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16426
16427 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16428 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16429
16430 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16431 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16432
16433 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16434 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16435 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16436
16437 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16438 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16439 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16440
16441 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16442 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16443
16444 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16445
16446 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16447 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16450
16451 /* SSE3 MMX */
16452 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16453 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16454 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16455 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16456 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16457 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16458
16459 /* SSSE3 */
16460 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16461 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16462 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16463 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16464 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16465 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16466 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16467 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16468 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16469 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16470 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16471 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16472 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16473 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16474 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16475 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16476 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16477 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16478 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16479 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16480 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16481 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16482 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16483 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16484 };
16485
16486 static const struct builtin_description bdesc_1arg[] =
16487 {
16488 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16489 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16490
16491 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16492 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16493 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16494
16495 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16496 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16497 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16498 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16499 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16500 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16501
16502 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16503 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16504
16505 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16506
16507 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16508 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16509
16510 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16511 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16512 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16513 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16514 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16515
16516 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16517
16518 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16519 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16520 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16521 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16522
16523 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16524 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16525 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16526
16527 /* SSE3 */
16528 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16529 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16530
16531 /* SSSE3 */
16532 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16533 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16534 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16535 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16536 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16537 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16538 };
16539
16540 static void
16541 ix86_init_builtins (void)
16542 {
16543 if (TARGET_MMX)
16544 ix86_init_mmx_sse_builtins ();
16545 }
16546
16547 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16548 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16549 builtins. */
16550 static void
16551 ix86_init_mmx_sse_builtins (void)
16552 {
16553 const struct builtin_description * d;
16554 size_t i;
16555
16556 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16557 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16558 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16559 tree V2DI_type_node
16560 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16561 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16562 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16563 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16564 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16565 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16566 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16567
16568 tree pchar_type_node = build_pointer_type (char_type_node);
16569 tree pcchar_type_node = build_pointer_type (
16570 build_type_variant (char_type_node, 1, 0));
16571 tree pfloat_type_node = build_pointer_type (float_type_node);
16572 tree pcfloat_type_node = build_pointer_type (
16573 build_type_variant (float_type_node, 1, 0));
16574 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16575 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16576 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16577
16578 /* Comparisons. */
16579 tree int_ftype_v4sf_v4sf
16580 = build_function_type_list (integer_type_node,
16581 V4SF_type_node, V4SF_type_node, NULL_TREE);
16582 tree v4si_ftype_v4sf_v4sf
16583 = build_function_type_list (V4SI_type_node,
16584 V4SF_type_node, V4SF_type_node, NULL_TREE);
16585 /* MMX/SSE/integer conversions. */
16586 tree int_ftype_v4sf
16587 = build_function_type_list (integer_type_node,
16588 V4SF_type_node, NULL_TREE);
16589 tree int64_ftype_v4sf
16590 = build_function_type_list (long_long_integer_type_node,
16591 V4SF_type_node, NULL_TREE);
16592 tree int_ftype_v8qi
16593 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16594 tree v4sf_ftype_v4sf_int
16595 = build_function_type_list (V4SF_type_node,
16596 V4SF_type_node, integer_type_node, NULL_TREE);
16597 tree v4sf_ftype_v4sf_int64
16598 = build_function_type_list (V4SF_type_node,
16599 V4SF_type_node, long_long_integer_type_node,
16600 NULL_TREE);
16601 tree v4sf_ftype_v4sf_v2si
16602 = build_function_type_list (V4SF_type_node,
16603 V4SF_type_node, V2SI_type_node, NULL_TREE);
16604
16605 /* Miscellaneous. */
16606 tree v8qi_ftype_v4hi_v4hi
16607 = build_function_type_list (V8QI_type_node,
16608 V4HI_type_node, V4HI_type_node, NULL_TREE);
16609 tree v4hi_ftype_v2si_v2si
16610 = build_function_type_list (V4HI_type_node,
16611 V2SI_type_node, V2SI_type_node, NULL_TREE);
16612 tree v4sf_ftype_v4sf_v4sf_int
16613 = build_function_type_list (V4SF_type_node,
16614 V4SF_type_node, V4SF_type_node,
16615 integer_type_node, NULL_TREE);
16616 tree v2si_ftype_v4hi_v4hi
16617 = build_function_type_list (V2SI_type_node,
16618 V4HI_type_node, V4HI_type_node, NULL_TREE);
16619 tree v4hi_ftype_v4hi_int
16620 = build_function_type_list (V4HI_type_node,
16621 V4HI_type_node, integer_type_node, NULL_TREE);
16622 tree v4hi_ftype_v4hi_di
16623 = build_function_type_list (V4HI_type_node,
16624 V4HI_type_node, long_long_unsigned_type_node,
16625 NULL_TREE);
16626 tree v2si_ftype_v2si_di
16627 = build_function_type_list (V2SI_type_node,
16628 V2SI_type_node, long_long_unsigned_type_node,
16629 NULL_TREE);
16630 tree void_ftype_void
16631 = build_function_type (void_type_node, void_list_node);
16632 tree void_ftype_unsigned
16633 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16634 tree void_ftype_unsigned_unsigned
16635 = build_function_type_list (void_type_node, unsigned_type_node,
16636 unsigned_type_node, NULL_TREE);
16637 tree void_ftype_pcvoid_unsigned_unsigned
16638 = build_function_type_list (void_type_node, const_ptr_type_node,
16639 unsigned_type_node, unsigned_type_node,
16640 NULL_TREE);
16641 tree unsigned_ftype_void
16642 = build_function_type (unsigned_type_node, void_list_node);
16643 tree v2si_ftype_v4sf
16644 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16645 /* Loads/stores. */
16646 tree void_ftype_v8qi_v8qi_pchar
16647 = build_function_type_list (void_type_node,
16648 V8QI_type_node, V8QI_type_node,
16649 pchar_type_node, NULL_TREE);
16650 tree v4sf_ftype_pcfloat
16651 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16652 /* @@@ the type is bogus */
16653 tree v4sf_ftype_v4sf_pv2si
16654 = build_function_type_list (V4SF_type_node,
16655 V4SF_type_node, pv2si_type_node, NULL_TREE);
16656 tree void_ftype_pv2si_v4sf
16657 = build_function_type_list (void_type_node,
16658 pv2si_type_node, V4SF_type_node, NULL_TREE);
16659 tree void_ftype_pfloat_v4sf
16660 = build_function_type_list (void_type_node,
16661 pfloat_type_node, V4SF_type_node, NULL_TREE);
16662 tree void_ftype_pdi_di
16663 = build_function_type_list (void_type_node,
16664 pdi_type_node, long_long_unsigned_type_node,
16665 NULL_TREE);
16666 tree void_ftype_pv2di_v2di
16667 = build_function_type_list (void_type_node,
16668 pv2di_type_node, V2DI_type_node, NULL_TREE);
16669 /* Normal vector unops. */
16670 tree v4sf_ftype_v4sf
16671 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16672 tree v16qi_ftype_v16qi
16673 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16674 tree v8hi_ftype_v8hi
16675 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16676 tree v4si_ftype_v4si
16677 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16678 tree v8qi_ftype_v8qi
16679 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16680 tree v4hi_ftype_v4hi
16681 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16682
16683 /* Normal vector binops. */
16684 tree v4sf_ftype_v4sf_v4sf
16685 = build_function_type_list (V4SF_type_node,
16686 V4SF_type_node, V4SF_type_node, NULL_TREE);
16687 tree v8qi_ftype_v8qi_v8qi
16688 = build_function_type_list (V8QI_type_node,
16689 V8QI_type_node, V8QI_type_node, NULL_TREE);
16690 tree v4hi_ftype_v4hi_v4hi
16691 = build_function_type_list (V4HI_type_node,
16692 V4HI_type_node, V4HI_type_node, NULL_TREE);
16693 tree v2si_ftype_v2si_v2si
16694 = build_function_type_list (V2SI_type_node,
16695 V2SI_type_node, V2SI_type_node, NULL_TREE);
16696 tree di_ftype_di_di
16697 = build_function_type_list (long_long_unsigned_type_node,
16698 long_long_unsigned_type_node,
16699 long_long_unsigned_type_node, NULL_TREE);
16700
16701 tree di_ftype_di_di_int
16702 = build_function_type_list (long_long_unsigned_type_node,
16703 long_long_unsigned_type_node,
16704 long_long_unsigned_type_node,
16705 integer_type_node, NULL_TREE);
16706
16707 tree v2si_ftype_v2sf
16708 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16709 tree v2sf_ftype_v2si
16710 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16711 tree v2si_ftype_v2si
16712 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16713 tree v2sf_ftype_v2sf
16714 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16715 tree v2sf_ftype_v2sf_v2sf
16716 = build_function_type_list (V2SF_type_node,
16717 V2SF_type_node, V2SF_type_node, NULL_TREE);
16718 tree v2si_ftype_v2sf_v2sf
16719 = build_function_type_list (V2SI_type_node,
16720 V2SF_type_node, V2SF_type_node, NULL_TREE);
16721 tree pint_type_node = build_pointer_type (integer_type_node);
16722 tree pdouble_type_node = build_pointer_type (double_type_node);
16723 tree pcdouble_type_node = build_pointer_type (
16724 build_type_variant (double_type_node, 1, 0));
16725 tree int_ftype_v2df_v2df
16726 = build_function_type_list (integer_type_node,
16727 V2DF_type_node, V2DF_type_node, NULL_TREE);
16728
16729 tree void_ftype_pcvoid
16730 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16731 tree v4sf_ftype_v4si
16732 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16733 tree v4si_ftype_v4sf
16734 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16735 tree v2df_ftype_v4si
16736 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16737 tree v4si_ftype_v2df
16738 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16739 tree v2si_ftype_v2df
16740 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16741 tree v4sf_ftype_v2df
16742 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16743 tree v2df_ftype_v2si
16744 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16745 tree v2df_ftype_v4sf
16746 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16747 tree int_ftype_v2df
16748 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16749 tree int64_ftype_v2df
16750 = build_function_type_list (long_long_integer_type_node,
16751 V2DF_type_node, NULL_TREE);
16752 tree v2df_ftype_v2df_int
16753 = build_function_type_list (V2DF_type_node,
16754 V2DF_type_node, integer_type_node, NULL_TREE);
16755 tree v2df_ftype_v2df_int64
16756 = build_function_type_list (V2DF_type_node,
16757 V2DF_type_node, long_long_integer_type_node,
16758 NULL_TREE);
16759 tree v4sf_ftype_v4sf_v2df
16760 = build_function_type_list (V4SF_type_node,
16761 V4SF_type_node, V2DF_type_node, NULL_TREE);
16762 tree v2df_ftype_v2df_v4sf
16763 = build_function_type_list (V2DF_type_node,
16764 V2DF_type_node, V4SF_type_node, NULL_TREE);
16765 tree v2df_ftype_v2df_v2df_int
16766 = build_function_type_list (V2DF_type_node,
16767 V2DF_type_node, V2DF_type_node,
16768 integer_type_node,
16769 NULL_TREE);
16770 tree v2df_ftype_v2df_pcdouble
16771 = build_function_type_list (V2DF_type_node,
16772 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16773 tree void_ftype_pdouble_v2df
16774 = build_function_type_list (void_type_node,
16775 pdouble_type_node, V2DF_type_node, NULL_TREE);
16776 tree void_ftype_pint_int
16777 = build_function_type_list (void_type_node,
16778 pint_type_node, integer_type_node, NULL_TREE);
16779 tree void_ftype_v16qi_v16qi_pchar
16780 = build_function_type_list (void_type_node,
16781 V16QI_type_node, V16QI_type_node,
16782 pchar_type_node, NULL_TREE);
16783 tree v2df_ftype_pcdouble
16784 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16785 tree v2df_ftype_v2df_v2df
16786 = build_function_type_list (V2DF_type_node,
16787 V2DF_type_node, V2DF_type_node, NULL_TREE);
16788 tree v16qi_ftype_v16qi_v16qi
16789 = build_function_type_list (V16QI_type_node,
16790 V16QI_type_node, V16QI_type_node, NULL_TREE);
16791 tree v8hi_ftype_v8hi_v8hi
16792 = build_function_type_list (V8HI_type_node,
16793 V8HI_type_node, V8HI_type_node, NULL_TREE);
16794 tree v4si_ftype_v4si_v4si
16795 = build_function_type_list (V4SI_type_node,
16796 V4SI_type_node, V4SI_type_node, NULL_TREE);
16797 tree v2di_ftype_v2di_v2di
16798 = build_function_type_list (V2DI_type_node,
16799 V2DI_type_node, V2DI_type_node, NULL_TREE);
16800 tree v2di_ftype_v2df_v2df
16801 = build_function_type_list (V2DI_type_node,
16802 V2DF_type_node, V2DF_type_node, NULL_TREE);
16803 tree v2df_ftype_v2df
16804 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16805 tree v2di_ftype_v2di_int
16806 = build_function_type_list (V2DI_type_node,
16807 V2DI_type_node, integer_type_node, NULL_TREE);
16808 tree v2di_ftype_v2di_v2di_int
16809 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16810 V2DI_type_node, integer_type_node, NULL_TREE);
16811 tree v4si_ftype_v4si_int
16812 = build_function_type_list (V4SI_type_node,
16813 V4SI_type_node, integer_type_node, NULL_TREE);
16814 tree v8hi_ftype_v8hi_int
16815 = build_function_type_list (V8HI_type_node,
16816 V8HI_type_node, integer_type_node, NULL_TREE);
16817 tree v8hi_ftype_v8hi_v2di
16818 = build_function_type_list (V8HI_type_node,
16819 V8HI_type_node, V2DI_type_node, NULL_TREE);
16820 tree v4si_ftype_v4si_v2di
16821 = build_function_type_list (V4SI_type_node,
16822 V4SI_type_node, V2DI_type_node, NULL_TREE);
16823 tree v4si_ftype_v8hi_v8hi
16824 = build_function_type_list (V4SI_type_node,
16825 V8HI_type_node, V8HI_type_node, NULL_TREE);
16826 tree di_ftype_v8qi_v8qi
16827 = build_function_type_list (long_long_unsigned_type_node,
16828 V8QI_type_node, V8QI_type_node, NULL_TREE);
16829 tree di_ftype_v2si_v2si
16830 = build_function_type_list (long_long_unsigned_type_node,
16831 V2SI_type_node, V2SI_type_node, NULL_TREE);
16832 tree v2di_ftype_v16qi_v16qi
16833 = build_function_type_list (V2DI_type_node,
16834 V16QI_type_node, V16QI_type_node, NULL_TREE);
16835 tree v2di_ftype_v4si_v4si
16836 = build_function_type_list (V2DI_type_node,
16837 V4SI_type_node, V4SI_type_node, NULL_TREE);
16838 tree int_ftype_v16qi
16839 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16840 tree v16qi_ftype_pcchar
16841 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16842 tree void_ftype_pchar_v16qi
16843 = build_function_type_list (void_type_node,
16844 pchar_type_node, V16QI_type_node, NULL_TREE);
16845
16846 tree v2di_ftype_v2di_unsigned_unsigned
16847 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16848 unsigned_type_node, unsigned_type_node,
16849 NULL_TREE);
16850 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16851 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16852 unsigned_type_node, unsigned_type_node,
16853 NULL_TREE);
16854 tree v2di_ftype_v2di_v16qi
16855 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16856 NULL_TREE);
16857
16858 tree float80_type;
16859 tree float128_type;
16860 tree ftype;
16861
16862 /* The __float80 type. */
16863 if (TYPE_MODE (long_double_type_node) == XFmode)
16864 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16865 "__float80");
16866 else
16867 {
16868 /* The __float80 type. */
16869 float80_type = make_node (REAL_TYPE);
16870 TYPE_PRECISION (float80_type) = 80;
16871 layout_type (float80_type);
16872 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16873 }
16874
16875 if (TARGET_64BIT)
16876 {
16877 float128_type = make_node (REAL_TYPE);
16878 TYPE_PRECISION (float128_type) = 128;
16879 layout_type (float128_type);
16880 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16881 }
16882
16883 /* Add all builtins that are more or less simple operations on two
16884 operands. */
16885 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16886 {
16887 /* Use one of the operands; the target can have a different mode for
16888 mask-generating compares. */
16889 enum machine_mode mode;
16890 tree type;
16891
16892 if (d->name == 0)
16893 continue;
16894 mode = insn_data[d->icode].operand[1].mode;
16895
16896 switch (mode)
16897 {
16898 case V16QImode:
16899 type = v16qi_ftype_v16qi_v16qi;
16900 break;
16901 case V8HImode:
16902 type = v8hi_ftype_v8hi_v8hi;
16903 break;
16904 case V4SImode:
16905 type = v4si_ftype_v4si_v4si;
16906 break;
16907 case V2DImode:
16908 type = v2di_ftype_v2di_v2di;
16909 break;
16910 case V2DFmode:
16911 type = v2df_ftype_v2df_v2df;
16912 break;
16913 case V4SFmode:
16914 type = v4sf_ftype_v4sf_v4sf;
16915 break;
16916 case V8QImode:
16917 type = v8qi_ftype_v8qi_v8qi;
16918 break;
16919 case V4HImode:
16920 type = v4hi_ftype_v4hi_v4hi;
16921 break;
16922 case V2SImode:
16923 type = v2si_ftype_v2si_v2si;
16924 break;
16925 case DImode:
16926 type = di_ftype_di_di;
16927 break;
16928
16929 default:
16930 gcc_unreachable ();
16931 }
16932
16933 /* Override for comparisons. */
16934 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16935 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16936 type = v4si_ftype_v4sf_v4sf;
16937
16938 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16939 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16940 type = v2di_ftype_v2df_v2df;
16941
16942 def_builtin (d->mask, d->name, type, d->code);
16943 }
16944
16945 /* Add all builtins that are more or less simple operations on 1 operand. */
16946 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16947 {
16948 enum machine_mode mode;
16949 tree type;
16950
16951 if (d->name == 0)
16952 continue;
16953 mode = insn_data[d->icode].operand[1].mode;
16954
16955 switch (mode)
16956 {
16957 case V16QImode:
16958 type = v16qi_ftype_v16qi;
16959 break;
16960 case V8HImode:
16961 type = v8hi_ftype_v8hi;
16962 break;
16963 case V4SImode:
16964 type = v4si_ftype_v4si;
16965 break;
16966 case V2DFmode:
16967 type = v2df_ftype_v2df;
16968 break;
16969 case V4SFmode:
16970 type = v4sf_ftype_v4sf;
16971 break;
16972 case V8QImode:
16973 type = v8qi_ftype_v8qi;
16974 break;
16975 case V4HImode:
16976 type = v4hi_ftype_v4hi;
16977 break;
16978 case V2SImode:
16979 type = v2si_ftype_v2si;
16980 break;
16981
16982 default:
16983 abort ();
16984 }
16985
16986 def_builtin (d->mask, d->name, type, d->code);
16987 }
16988
16989 /* Add the remaining MMX insns with somewhat more complicated types. */
16990 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16991 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16992 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16993 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16994
16995 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16996 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16997 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16998
16999 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17000 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17001
17002 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17003 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17004
17005 /* comi/ucomi insns. */
17006 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17007 if (d->mask == MASK_SSE2)
17008 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17009 else
17010 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17011
17012 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17013 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17014 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17015
17016 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17017 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17018 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17019 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17020 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17021 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17022 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17023 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17024 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17025 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17026 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17027
17028 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17029
17030 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17031 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17032
17033 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17034 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17035 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17036 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17037
17038 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17039 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17040 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17041 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17042
17043 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17044
17045 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17046
17047 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17048 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17049 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17050 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17051 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17052 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17053
17054 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17055
17056 /* Original 3DNow! */
17057 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17058 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17059 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17060 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17061 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17062 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17063 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17064 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17065 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17066 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17067 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17068 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17069 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17070 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17071 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17072 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17073 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17074 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17075 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17076 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17077
17078 /* 3DNow! extension as used in the Athlon CPU. */
17079 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17080 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17081 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17082 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17083 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17084 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17085
17086 /* SSE2 */
17087 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17088
17089 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17090 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17091
17092 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17093 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17094
17095 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17096 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17097 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17098 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17099 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17100
17101 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17102 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17103 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17104 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17105
17106 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17107 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17108
17109 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17110
17111 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17112 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17113
17114 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17115 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17116 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17117 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17118 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17119
17120 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17121
17122 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17123 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17124 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17125 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17126
17127 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17128 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17129 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17130
17131 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17132 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17133 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17134 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17135
17136 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17137 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17138 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17139
17140 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17141 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17142
17143 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17144 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17145
17146 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17147 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17148 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17149
17150 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17151 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17152 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17153
17154 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17155 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17156
17157 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17158 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17159 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17160 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17161
17162 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17163 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17164 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17165 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17166
17167 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17168 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17169
17170 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17171
17172 /* Prescott New Instructions. */
17173 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17174 void_ftype_pcvoid_unsigned_unsigned,
17175 IX86_BUILTIN_MONITOR);
17176 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17177 void_ftype_unsigned_unsigned,
17178 IX86_BUILTIN_MWAIT);
17179 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17180 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17181
17182 /* SSSE3. */
17183 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17184 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17185 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17186 IX86_BUILTIN_PALIGNR);
17187
17188 /* AMDFAM10 SSE4A New built-ins */
17189 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17190 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17191 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17192 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17193 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17194 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17195 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17196 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17197 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17198 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17199 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17200 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17201
17202 /* Access to the vec_init patterns. */
17203 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17204 integer_type_node, NULL_TREE);
17205 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17206 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17207
17208 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17209 short_integer_type_node,
17210 short_integer_type_node,
17211 short_integer_type_node, NULL_TREE);
17212 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17213 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17214
17215 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17216 char_type_node, char_type_node,
17217 char_type_node, char_type_node,
17218 char_type_node, char_type_node,
17219 char_type_node, NULL_TREE);
17220 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17221 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17222
17223 /* Access to the vec_extract patterns. */
17224 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17225 integer_type_node, NULL_TREE);
17226 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17227 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17228
17229 ftype = build_function_type_list (long_long_integer_type_node,
17230 V2DI_type_node, integer_type_node,
17231 NULL_TREE);
17232 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17233 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17234
17235 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17236 integer_type_node, NULL_TREE);
17237 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17238 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17239
17240 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17241 integer_type_node, NULL_TREE);
17242 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17243 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17244
17245 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17246 integer_type_node, NULL_TREE);
17247 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17248 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17249
17250 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17251 integer_type_node, NULL_TREE);
17252 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17253 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17254
17255 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17256 integer_type_node, NULL_TREE);
17257 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17258 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17259
17260 /* Access to the vec_set patterns. */
17261 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17262 intHI_type_node,
17263 integer_type_node, NULL_TREE);
17264 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17265 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17266
17267 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17268 intHI_type_node,
17269 integer_type_node, NULL_TREE);
17270 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17271 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17272 }
17273
17274 /* Errors in the source file can cause expand_expr to return const0_rtx
17275 where we expect a vector. To avoid crashing, use one of the vector
17276 clear instructions. */
17277 static rtx
17278 safe_vector_operand (rtx x, enum machine_mode mode)
17279 {
17280 if (x == const0_rtx)
17281 x = CONST0_RTX (mode);
17282 return x;
17283 }
17284
17285 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17286
17287 static rtx
17288 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17289 {
17290 rtx pat, xops[3];
17291 tree arg0 = CALL_EXPR_ARG (exp, 0);
17292 tree arg1 = CALL_EXPR_ARG (exp, 1);
17293 rtx op0 = expand_normal (arg0);
17294 rtx op1 = expand_normal (arg1);
17295 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17296 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17297 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17298
17299 if (VECTOR_MODE_P (mode0))
17300 op0 = safe_vector_operand (op0, mode0);
17301 if (VECTOR_MODE_P (mode1))
17302 op1 = safe_vector_operand (op1, mode1);
17303
17304 if (optimize || !target
17305 || GET_MODE (target) != tmode
17306 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17307 target = gen_reg_rtx (tmode);
17308
17309 if (GET_MODE (op1) == SImode && mode1 == TImode)
17310 {
17311 rtx x = gen_reg_rtx (V4SImode);
17312 emit_insn (gen_sse2_loadd (x, op1));
17313 op1 = gen_lowpart (TImode, x);
17314 }
17315
17316 /* The insn must want input operands in the same modes as the
17317 result. */
17318 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17319 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17320
17321 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17322 op0 = copy_to_mode_reg (mode0, op0);
17323 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17324 op1 = copy_to_mode_reg (mode1, op1);
17325
17326 /* ??? Using ix86_fixup_binary_operands is problematic when
17327 we've got mismatched modes. Fake it. */
17328
17329 xops[0] = target;
17330 xops[1] = op0;
17331 xops[2] = op1;
17332
17333 if (tmode == mode0 && tmode == mode1)
17334 {
17335 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17336 op0 = xops[1];
17337 op1 = xops[2];
17338 }
17339 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17340 {
17341 op0 = force_reg (mode0, op0);
17342 op1 = force_reg (mode1, op1);
17343 target = gen_reg_rtx (tmode);
17344 }
17345
17346 pat = GEN_FCN (icode) (target, op0, op1);
17347 if (! pat)
17348 return 0;
17349 emit_insn (pat);
17350 return target;
17351 }
17352
17353 /* Subroutine of ix86_expand_builtin to take care of stores. */
17354
17355 static rtx
17356 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17357 {
17358 rtx pat;
17359 tree arg0 = CALL_EXPR_ARG (exp, 0);
17360 tree arg1 = CALL_EXPR_ARG (exp, 1);
17361 rtx op0 = expand_normal (arg0);
17362 rtx op1 = expand_normal (arg1);
17363 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17364 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17365
17366 if (VECTOR_MODE_P (mode1))
17367 op1 = safe_vector_operand (op1, mode1);
17368
17369 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17370 op1 = copy_to_mode_reg (mode1, op1);
17371
17372 pat = GEN_FCN (icode) (op0, op1);
17373 if (pat)
17374 emit_insn (pat);
17375 return 0;
17376 }
17377
17378 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17379
17380 static rtx
17381 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17382 rtx target, int do_load)
17383 {
17384 rtx pat;
17385 tree arg0 = CALL_EXPR_ARG (exp, 0);
17386 rtx op0 = expand_normal (arg0);
17387 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17388 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17389
17390 if (optimize || !target
17391 || GET_MODE (target) != tmode
17392 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17393 target = gen_reg_rtx (tmode);
17394 if (do_load)
17395 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17396 else
17397 {
17398 if (VECTOR_MODE_P (mode0))
17399 op0 = safe_vector_operand (op0, mode0);
17400
17401 if ((optimize && !register_operand (op0, mode0))
17402 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17403 op0 = copy_to_mode_reg (mode0, op0);
17404 }
17405
17406 pat = GEN_FCN (icode) (target, op0);
17407 if (! pat)
17408 return 0;
17409 emit_insn (pat);
17410 return target;
17411 }
17412
17413 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17414 sqrtss, rsqrtss, rcpss. */
17415
17416 static rtx
17417 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17418 {
17419 rtx pat;
17420 tree arg0 = CALL_EXPR_ARG (exp, 0);
17421 rtx op1, op0 = expand_normal (arg0);
17422 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17423 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17424
17425 if (optimize || !target
17426 || GET_MODE (target) != tmode
17427 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17428 target = gen_reg_rtx (tmode);
17429
17430 if (VECTOR_MODE_P (mode0))
17431 op0 = safe_vector_operand (op0, mode0);
17432
17433 if ((optimize && !register_operand (op0, mode0))
17434 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17435 op0 = copy_to_mode_reg (mode0, op0);
17436
17437 op1 = op0;
17438 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17439 op1 = copy_to_mode_reg (mode0, op1);
17440
17441 pat = GEN_FCN (icode) (target, op0, op1);
17442 if (! pat)
17443 return 0;
17444 emit_insn (pat);
17445 return target;
17446 }
17447
17448 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17449
17450 static rtx
17451 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17452 rtx target)
17453 {
17454 rtx pat;
17455 tree arg0 = CALL_EXPR_ARG (exp, 0);
17456 tree arg1 = CALL_EXPR_ARG (exp, 1);
17457 rtx op0 = expand_normal (arg0);
17458 rtx op1 = expand_normal (arg1);
17459 rtx op2;
17460 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17461 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17462 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17463 enum rtx_code comparison = d->comparison;
17464
17465 if (VECTOR_MODE_P (mode0))
17466 op0 = safe_vector_operand (op0, mode0);
17467 if (VECTOR_MODE_P (mode1))
17468 op1 = safe_vector_operand (op1, mode1);
17469
17470 /* Swap operands if we have a comparison that isn't available in
17471 hardware. */
17472 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17473 {
17474 rtx tmp = gen_reg_rtx (mode1);
17475 emit_move_insn (tmp, op1);
17476 op1 = op0;
17477 op0 = tmp;
17478 }
17479
17480 if (optimize || !target
17481 || GET_MODE (target) != tmode
17482 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17483 target = gen_reg_rtx (tmode);
17484
17485 if ((optimize && !register_operand (op0, mode0))
17486 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17487 op0 = copy_to_mode_reg (mode0, op0);
17488 if ((optimize && !register_operand (op1, mode1))
17489 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17490 op1 = copy_to_mode_reg (mode1, op1);
17491
17492 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17493 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17494 if (! pat)
17495 return 0;
17496 emit_insn (pat);
17497 return target;
17498 }
17499
17500 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17501
17502 static rtx
17503 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17504 rtx target)
17505 {
17506 rtx pat;
17507 tree arg0 = CALL_EXPR_ARG (exp, 0);
17508 tree arg1 = CALL_EXPR_ARG (exp, 1);
17509 rtx op0 = expand_normal (arg0);
17510 rtx op1 = expand_normal (arg1);
17511 rtx op2;
17512 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17513 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17514 enum rtx_code comparison = d->comparison;
17515
17516 if (VECTOR_MODE_P (mode0))
17517 op0 = safe_vector_operand (op0, mode0);
17518 if (VECTOR_MODE_P (mode1))
17519 op1 = safe_vector_operand (op1, mode1);
17520
17521 /* Swap operands if we have a comparison that isn't available in
17522 hardware. */
17523 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17524 {
17525 rtx tmp = op1;
17526 op1 = op0;
17527 op0 = tmp;
17528 }
17529
17530 target = gen_reg_rtx (SImode);
17531 emit_move_insn (target, const0_rtx);
17532 target = gen_rtx_SUBREG (QImode, target, 0);
17533
17534 if ((optimize && !register_operand (op0, mode0))
17535 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17536 op0 = copy_to_mode_reg (mode0, op0);
17537 if ((optimize && !register_operand (op1, mode1))
17538 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17539 op1 = copy_to_mode_reg (mode1, op1);
17540
17541 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17542 pat = GEN_FCN (d->icode) (op0, op1);
17543 if (! pat)
17544 return 0;
17545 emit_insn (pat);
17546 emit_insn (gen_rtx_SET (VOIDmode,
17547 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17548 gen_rtx_fmt_ee (comparison, QImode,
17549 SET_DEST (pat),
17550 const0_rtx)));
17551
17552 return SUBREG_REG (target);
17553 }
17554
17555 /* Return the integer constant in ARG. Constrain it to be in the range
17556 of the subparts of VEC_TYPE; issue an error if not. */
17557
17558 static int
17559 get_element_number (tree vec_type, tree arg)
17560 {
17561 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17562
17563 if (!host_integerp (arg, 1)
17564 || (elt = tree_low_cst (arg, 1), elt > max))
17565 {
17566 error ("selector must be an integer constant in the range 0..%wi", max);
17567 return 0;
17568 }
17569
17570 return elt;
17571 }
17572
17573 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17574 ix86_expand_vector_init. We DO have language-level syntax for this, in
17575 the form of (type){ init-list }. Except that since we can't place emms
17576 instructions from inside the compiler, we can't allow the use of MMX
17577 registers unless the user explicitly asks for it. So we do *not* define
17578 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17579 we have builtins invoked by mmintrin.h that gives us license to emit
17580 these sorts of instructions. */
17581
17582 static rtx
17583 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17584 {
17585 enum machine_mode tmode = TYPE_MODE (type);
17586 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17587 int i, n_elt = GET_MODE_NUNITS (tmode);
17588 rtvec v = rtvec_alloc (n_elt);
17589
17590 gcc_assert (VECTOR_MODE_P (tmode));
17591 gcc_assert (call_expr_nargs (exp) == n_elt);
17592
17593 for (i = 0; i < n_elt; ++i)
17594 {
17595 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17596 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17597 }
17598
17599 if (!target || !register_operand (target, tmode))
17600 target = gen_reg_rtx (tmode);
17601
17602 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17603 return target;
17604 }
17605
17606 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17607 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17608 had a language-level syntax for referencing vector elements. */
17609
17610 static rtx
17611 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17612 {
17613 enum machine_mode tmode, mode0;
17614 tree arg0, arg1;
17615 int elt;
17616 rtx op0;
17617
17618 arg0 = CALL_EXPR_ARG (exp, 0);
17619 arg1 = CALL_EXPR_ARG (exp, 1);
17620
17621 op0 = expand_normal (arg0);
17622 elt = get_element_number (TREE_TYPE (arg0), arg1);
17623
17624 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17625 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17626 gcc_assert (VECTOR_MODE_P (mode0));
17627
17628 op0 = force_reg (mode0, op0);
17629
17630 if (optimize || !target || !register_operand (target, tmode))
17631 target = gen_reg_rtx (tmode);
17632
17633 ix86_expand_vector_extract (true, target, op0, elt);
17634
17635 return target;
17636 }
17637
17638 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17639 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17640 a language-level syntax for referencing vector elements. */
17641
17642 static rtx
17643 ix86_expand_vec_set_builtin (tree exp)
17644 {
17645 enum machine_mode tmode, mode1;
17646 tree arg0, arg1, arg2;
17647 int elt;
17648 rtx op0, op1;
17649
17650 arg0 = CALL_EXPR_ARG (exp, 0);
17651 arg1 = CALL_EXPR_ARG (exp, 1);
17652 arg2 = CALL_EXPR_ARG (exp, 2);
17653
17654 tmode = TYPE_MODE (TREE_TYPE (arg0));
17655 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17656 gcc_assert (VECTOR_MODE_P (tmode));
17657
17658 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17659 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17660 elt = get_element_number (TREE_TYPE (arg0), arg2);
17661
17662 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17663 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17664
17665 op0 = force_reg (tmode, op0);
17666 op1 = force_reg (mode1, op1);
17667
17668 ix86_expand_vector_set (true, op0, op1, elt);
17669
17670 return op0;
17671 }
17672
17673 /* Expand an expression EXP that calls a built-in function,
17674 with result going to TARGET if that's convenient
17675 (and in mode MODE if that's convenient).
17676 SUBTARGET may be used as the target for computing one of EXP's operands.
17677 IGNORE is nonzero if the value is to be ignored. */
17678
17679 static rtx
17680 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17681 enum machine_mode mode ATTRIBUTE_UNUSED,
17682 int ignore ATTRIBUTE_UNUSED)
17683 {
17684 const struct builtin_description *d;
17685 size_t i;
17686 enum insn_code icode;
17687 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17688 tree arg0, arg1, arg2, arg3;
17689 rtx op0, op1, op2, op3, pat;
17690 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17691 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17692
17693 switch (fcode)
17694 {
17695 case IX86_BUILTIN_EMMS:
17696 emit_insn (gen_mmx_emms ());
17697 return 0;
17698
17699 case IX86_BUILTIN_SFENCE:
17700 emit_insn (gen_sse_sfence ());
17701 return 0;
17702
17703 case IX86_BUILTIN_MASKMOVQ:
17704 case IX86_BUILTIN_MASKMOVDQU:
17705 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17706 ? CODE_FOR_mmx_maskmovq
17707 : CODE_FOR_sse2_maskmovdqu);
17708 /* Note the arg order is different from the operand order. */
17709 arg1 = CALL_EXPR_ARG (exp, 0);
17710 arg2 = CALL_EXPR_ARG (exp, 1);
17711 arg0 = CALL_EXPR_ARG (exp, 2);
17712 op0 = expand_normal (arg0);
17713 op1 = expand_normal (arg1);
17714 op2 = expand_normal (arg2);
17715 mode0 = insn_data[icode].operand[0].mode;
17716 mode1 = insn_data[icode].operand[1].mode;
17717 mode2 = insn_data[icode].operand[2].mode;
17718
17719 op0 = force_reg (Pmode, op0);
17720 op0 = gen_rtx_MEM (mode1, op0);
17721
17722 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17723 op0 = copy_to_mode_reg (mode0, op0);
17724 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17725 op1 = copy_to_mode_reg (mode1, op1);
17726 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17727 op2 = copy_to_mode_reg (mode2, op2);
17728 pat = GEN_FCN (icode) (op0, op1, op2);
17729 if (! pat)
17730 return 0;
17731 emit_insn (pat);
17732 return 0;
17733
17734 case IX86_BUILTIN_SQRTSS:
17735 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17736 case IX86_BUILTIN_RSQRTSS:
17737 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17738 case IX86_BUILTIN_RCPSS:
17739 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17740
17741 case IX86_BUILTIN_LOADUPS:
17742 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17743
17744 case IX86_BUILTIN_STOREUPS:
17745 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17746
17747 case IX86_BUILTIN_LOADHPS:
17748 case IX86_BUILTIN_LOADLPS:
17749 case IX86_BUILTIN_LOADHPD:
17750 case IX86_BUILTIN_LOADLPD:
17751 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17752 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17753 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17754 : CODE_FOR_sse2_loadlpd);
17755 arg0 = CALL_EXPR_ARG (exp, 0);
17756 arg1 = CALL_EXPR_ARG (exp, 1);
17757 op0 = expand_normal (arg0);
17758 op1 = expand_normal (arg1);
17759 tmode = insn_data[icode].operand[0].mode;
17760 mode0 = insn_data[icode].operand[1].mode;
17761 mode1 = insn_data[icode].operand[2].mode;
17762
17763 op0 = force_reg (mode0, op0);
17764 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17765 if (optimize || target == 0
17766 || GET_MODE (target) != tmode
17767 || !register_operand (target, tmode))
17768 target = gen_reg_rtx (tmode);
17769 pat = GEN_FCN (icode) (target, op0, op1);
17770 if (! pat)
17771 return 0;
17772 emit_insn (pat);
17773 return target;
17774
17775 case IX86_BUILTIN_STOREHPS:
17776 case IX86_BUILTIN_STORELPS:
17777 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17778 : CODE_FOR_sse_storelps);
17779 arg0 = CALL_EXPR_ARG (exp, 0);
17780 arg1 = CALL_EXPR_ARG (exp, 1);
17781 op0 = expand_normal (arg0);
17782 op1 = expand_normal (arg1);
17783 mode0 = insn_data[icode].operand[0].mode;
17784 mode1 = insn_data[icode].operand[1].mode;
17785
17786 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17787 op1 = force_reg (mode1, op1);
17788
17789 pat = GEN_FCN (icode) (op0, op1);
17790 if (! pat)
17791 return 0;
17792 emit_insn (pat);
17793 return const0_rtx;
17794
17795 case IX86_BUILTIN_MOVNTPS:
17796 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17797 case IX86_BUILTIN_MOVNTQ:
17798 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17799
17800 case IX86_BUILTIN_LDMXCSR:
17801 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17802 target = assign_386_stack_local (SImode, SLOT_TEMP);
17803 emit_move_insn (target, op0);
17804 emit_insn (gen_sse_ldmxcsr (target));
17805 return 0;
17806
17807 case IX86_BUILTIN_STMXCSR:
17808 target = assign_386_stack_local (SImode, SLOT_TEMP);
17809 emit_insn (gen_sse_stmxcsr (target));
17810 return copy_to_mode_reg (SImode, target);
17811
17812 case IX86_BUILTIN_SHUFPS:
17813 case IX86_BUILTIN_SHUFPD:
17814 icode = (fcode == IX86_BUILTIN_SHUFPS
17815 ? CODE_FOR_sse_shufps
17816 : CODE_FOR_sse2_shufpd);
17817 arg0 = CALL_EXPR_ARG (exp, 0);
17818 arg1 = CALL_EXPR_ARG (exp, 1);
17819 arg2 = CALL_EXPR_ARG (exp, 2);
17820 op0 = expand_normal (arg0);
17821 op1 = expand_normal (arg1);
17822 op2 = expand_normal (arg2);
17823 tmode = insn_data[icode].operand[0].mode;
17824 mode0 = insn_data[icode].operand[1].mode;
17825 mode1 = insn_data[icode].operand[2].mode;
17826 mode2 = insn_data[icode].operand[3].mode;
17827
17828 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17829 op0 = copy_to_mode_reg (mode0, op0);
17830 if ((optimize && !register_operand (op1, mode1))
17831 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17832 op1 = copy_to_mode_reg (mode1, op1);
17833 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17834 {
17835 /* @@@ better error message */
17836 error ("mask must be an immediate");
17837 return gen_reg_rtx (tmode);
17838 }
17839 if (optimize || target == 0
17840 || GET_MODE (target) != tmode
17841 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17842 target = gen_reg_rtx (tmode);
17843 pat = GEN_FCN (icode) (target, op0, op1, op2);
17844 if (! pat)
17845 return 0;
17846 emit_insn (pat);
17847 return target;
17848
17849 case IX86_BUILTIN_PSHUFW:
17850 case IX86_BUILTIN_PSHUFD:
17851 case IX86_BUILTIN_PSHUFHW:
17852 case IX86_BUILTIN_PSHUFLW:
17853 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17854 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17855 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17856 : CODE_FOR_mmx_pshufw);
17857 arg0 = CALL_EXPR_ARG (exp, 0);
17858 arg1 = CALL_EXPR_ARG (exp, 1);
17859 op0 = expand_normal (arg0);
17860 op1 = expand_normal (arg1);
17861 tmode = insn_data[icode].operand[0].mode;
17862 mode1 = insn_data[icode].operand[1].mode;
17863 mode2 = insn_data[icode].operand[2].mode;
17864
17865 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17866 op0 = copy_to_mode_reg (mode1, op0);
17867 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17868 {
17869 /* @@@ better error message */
17870 error ("mask must be an immediate");
17871 return const0_rtx;
17872 }
17873 if (target == 0
17874 || GET_MODE (target) != tmode
17875 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17876 target = gen_reg_rtx (tmode);
17877 pat = GEN_FCN (icode) (target, op0, op1);
17878 if (! pat)
17879 return 0;
17880 emit_insn (pat);
17881 return target;
17882
17883 case IX86_BUILTIN_PSLLDQI128:
17884 case IX86_BUILTIN_PSRLDQI128:
17885 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17886 : CODE_FOR_sse2_lshrti3);
17887 arg0 = CALL_EXPR_ARG (exp, 0);
17888 arg1 = CALL_EXPR_ARG (exp, 1);
17889 op0 = expand_normal (arg0);
17890 op1 = expand_normal (arg1);
17891 tmode = insn_data[icode].operand[0].mode;
17892 mode1 = insn_data[icode].operand[1].mode;
17893 mode2 = insn_data[icode].operand[2].mode;
17894
17895 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17896 {
17897 op0 = copy_to_reg (op0);
17898 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17899 }
17900 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17901 {
17902 error ("shift must be an immediate");
17903 return const0_rtx;
17904 }
17905 target = gen_reg_rtx (V2DImode);
17906 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17907 if (! pat)
17908 return 0;
17909 emit_insn (pat);
17910 return target;
17911
17912 case IX86_BUILTIN_FEMMS:
17913 emit_insn (gen_mmx_femms ());
17914 return NULL_RTX;
17915
17916 case IX86_BUILTIN_PAVGUSB:
17917 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17918
17919 case IX86_BUILTIN_PF2ID:
17920 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17921
17922 case IX86_BUILTIN_PFACC:
17923 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17924
17925 case IX86_BUILTIN_PFADD:
17926 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17927
17928 case IX86_BUILTIN_PFCMPEQ:
17929 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17930
17931 case IX86_BUILTIN_PFCMPGE:
17932 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17933
17934 case IX86_BUILTIN_PFCMPGT:
17935 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17936
17937 case IX86_BUILTIN_PFMAX:
17938 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17939
17940 case IX86_BUILTIN_PFMIN:
17941 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17942
17943 case IX86_BUILTIN_PFMUL:
17944 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17945
17946 case IX86_BUILTIN_PFRCP:
17947 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17948
17949 case IX86_BUILTIN_PFRCPIT1:
17950 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17951
17952 case IX86_BUILTIN_PFRCPIT2:
17953 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17954
17955 case IX86_BUILTIN_PFRSQIT1:
17956 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17957
17958 case IX86_BUILTIN_PFRSQRT:
17959 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17960
17961 case IX86_BUILTIN_PFSUB:
17962 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17963
17964 case IX86_BUILTIN_PFSUBR:
17965 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17966
17967 case IX86_BUILTIN_PI2FD:
17968 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17969
17970 case IX86_BUILTIN_PMULHRW:
17971 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17972
17973 case IX86_BUILTIN_PF2IW:
17974 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17975
17976 case IX86_BUILTIN_PFNACC:
17977 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
17978
17979 case IX86_BUILTIN_PFPNACC:
17980 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
17981
17982 case IX86_BUILTIN_PI2FW:
17983 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
17984
17985 case IX86_BUILTIN_PSWAPDSI:
17986 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
17987
17988 case IX86_BUILTIN_PSWAPDSF:
17989 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
17990
17991 case IX86_BUILTIN_SQRTSD:
17992 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
17993 case IX86_BUILTIN_LOADUPD:
17994 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
17995 case IX86_BUILTIN_STOREUPD:
17996 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
17997
17998 case IX86_BUILTIN_MFENCE:
17999 emit_insn (gen_sse2_mfence ());
18000 return 0;
18001 case IX86_BUILTIN_LFENCE:
18002 emit_insn (gen_sse2_lfence ());
18003 return 0;
18004
18005 case IX86_BUILTIN_CLFLUSH:
18006 arg0 = CALL_EXPR_ARG (exp, 0);
18007 op0 = expand_normal (arg0);
18008 icode = CODE_FOR_sse2_clflush;
18009 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18010 op0 = copy_to_mode_reg (Pmode, op0);
18011
18012 emit_insn (gen_sse2_clflush (op0));
18013 return 0;
18014
18015 case IX86_BUILTIN_MOVNTPD:
18016 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18017 case IX86_BUILTIN_MOVNTDQ:
18018 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18019 case IX86_BUILTIN_MOVNTI:
18020 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18021
18022 case IX86_BUILTIN_LOADDQU:
18023 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18024 case IX86_BUILTIN_STOREDQU:
18025 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18026
18027 case IX86_BUILTIN_MONITOR:
18028 arg0 = CALL_EXPR_ARG (exp, 0);
18029 arg1 = CALL_EXPR_ARG (exp, 1);
18030 arg2 = CALL_EXPR_ARG (exp, 2);
18031 op0 = expand_normal (arg0);
18032 op1 = expand_normal (arg1);
18033 op2 = expand_normal (arg2);
18034 if (!REG_P (op0))
18035 op0 = copy_to_mode_reg (Pmode, op0);
18036 if (!REG_P (op1))
18037 op1 = copy_to_mode_reg (SImode, op1);
18038 if (!REG_P (op2))
18039 op2 = copy_to_mode_reg (SImode, op2);
18040 if (!TARGET_64BIT)
18041 emit_insn (gen_sse3_monitor (op0, op1, op2));
18042 else
18043 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18044 return 0;
18045
18046 case IX86_BUILTIN_MWAIT:
18047 arg0 = CALL_EXPR_ARG (exp, 0);
18048 arg1 = CALL_EXPR_ARG (exp, 1);
18049 op0 = expand_normal (arg0);
18050 op1 = expand_normal (arg1);
18051 if (!REG_P (op0))
18052 op0 = copy_to_mode_reg (SImode, op0);
18053 if (!REG_P (op1))
18054 op1 = copy_to_mode_reg (SImode, op1);
18055 emit_insn (gen_sse3_mwait (op0, op1));
18056 return 0;
18057
18058 case IX86_BUILTIN_LDDQU:
18059 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18060 target, 1);
18061
18062 case IX86_BUILTIN_PALIGNR:
18063 case IX86_BUILTIN_PALIGNR128:
18064 if (fcode == IX86_BUILTIN_PALIGNR)
18065 {
18066 icode = CODE_FOR_ssse3_palignrdi;
18067 mode = DImode;
18068 }
18069 else
18070 {
18071 icode = CODE_FOR_ssse3_palignrti;
18072 mode = V2DImode;
18073 }
18074 arg0 = CALL_EXPR_ARG (exp, 0);
18075 arg1 = CALL_EXPR_ARG (exp, 1);
18076 arg2 = CALL_EXPR_ARG (exp, 2);
18077 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18078 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18079 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18080 tmode = insn_data[icode].operand[0].mode;
18081 mode1 = insn_data[icode].operand[1].mode;
18082 mode2 = insn_data[icode].operand[2].mode;
18083 mode3 = insn_data[icode].operand[3].mode;
18084
18085 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18086 {
18087 op0 = copy_to_reg (op0);
18088 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18089 }
18090 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18091 {
18092 op1 = copy_to_reg (op1);
18093 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18094 }
18095 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18096 {
18097 error ("shift must be an immediate");
18098 return const0_rtx;
18099 }
18100 target = gen_reg_rtx (mode);
18101 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18102 op0, op1, op2);
18103 if (! pat)
18104 return 0;
18105 emit_insn (pat);
18106 return target;
18107
18108 case IX86_BUILTIN_MOVNTSD:
18109 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18110
18111 case IX86_BUILTIN_MOVNTSS:
18112 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18113
18114 case IX86_BUILTIN_INSERTQ:
18115 case IX86_BUILTIN_EXTRQ:
18116 icode = (fcode == IX86_BUILTIN_EXTRQ
18117 ? CODE_FOR_sse4a_extrq
18118 : CODE_FOR_sse4a_insertq);
18119 arg0 = CALL_EXPR_ARG (exp, 0);
18120 arg1 = CALL_EXPR_ARG (exp, 1);
18121 op0 = expand_normal (arg0);
18122 op1 = expand_normal (arg1);
18123 tmode = insn_data[icode].operand[0].mode;
18124 mode1 = insn_data[icode].operand[1].mode;
18125 mode2 = insn_data[icode].operand[2].mode;
18126 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18127 op0 = copy_to_mode_reg (mode1, op0);
18128 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18129 op1 = copy_to_mode_reg (mode2, op1);
18130 if (optimize || target == 0
18131 || GET_MODE (target) != tmode
18132 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18133 target = gen_reg_rtx (tmode);
18134 pat = GEN_FCN (icode) (target, op0, op1);
18135 if (! pat)
18136 return NULL_RTX;
18137 emit_insn (pat);
18138 return target;
18139
18140 case IX86_BUILTIN_EXTRQI:
18141 icode = CODE_FOR_sse4a_extrqi;
18142 arg0 = CALL_EXPR_ARG (exp, 0);
18143 arg1 = CALL_EXPR_ARG (exp, 1);
18144 arg2 = CALL_EXPR_ARG (exp, 2);
18145 op0 = expand_normal (arg0);
18146 op1 = expand_normal (arg1);
18147 op2 = expand_normal (arg2);
18148 tmode = insn_data[icode].operand[0].mode;
18149 mode1 = insn_data[icode].operand[1].mode;
18150 mode2 = insn_data[icode].operand[2].mode;
18151 mode3 = insn_data[icode].operand[3].mode;
18152 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18153 op0 = copy_to_mode_reg (mode1, op0);
18154 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18155 {
18156 error ("index mask must be an immediate");
18157 return gen_reg_rtx (tmode);
18158 }
18159 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18160 {
18161 error ("length mask must be an immediate");
18162 return gen_reg_rtx (tmode);
18163 }
18164 if (optimize || target == 0
18165 || GET_MODE (target) != tmode
18166 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18167 target = gen_reg_rtx (tmode);
18168 pat = GEN_FCN (icode) (target, op0, op1, op2);
18169 if (! pat)
18170 return NULL_RTX;
18171 emit_insn (pat);
18172 return target;
18173
18174 case IX86_BUILTIN_INSERTQI:
18175 icode = CODE_FOR_sse4a_insertqi;
18176 arg0 = CALL_EXPR_ARG (exp, 0);
18177 arg1 = CALL_EXPR_ARG (exp, 1);
18178 arg2 = CALL_EXPR_ARG (exp, 2);
18179 arg3 = CALL_EXPR_ARG (exp, 3);
18180 op0 = expand_normal (arg0);
18181 op1 = expand_normal (arg1);
18182 op2 = expand_normal (arg2);
18183 op3 = expand_normal (arg3);
18184 tmode = insn_data[icode].operand[0].mode;
18185 mode1 = insn_data[icode].operand[1].mode;
18186 mode2 = insn_data[icode].operand[2].mode;
18187 mode3 = insn_data[icode].operand[3].mode;
18188 mode4 = insn_data[icode].operand[4].mode;
18189
18190 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18191 op0 = copy_to_mode_reg (mode1, op0);
18192
18193 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18194 op1 = copy_to_mode_reg (mode2, op1);
18195
18196 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18197 {
18198 error ("index mask must be an immediate");
18199 return gen_reg_rtx (tmode);
18200 }
18201 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18202 {
18203 error ("length mask must be an immediate");
18204 return gen_reg_rtx (tmode);
18205 }
18206 if (optimize || target == 0
18207 || GET_MODE (target) != tmode
18208 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18209 target = gen_reg_rtx (tmode);
18210 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18211 if (! pat)
18212 return NULL_RTX;
18213 emit_insn (pat);
18214 return target;
18215
18216 case IX86_BUILTIN_VEC_INIT_V2SI:
18217 case IX86_BUILTIN_VEC_INIT_V4HI:
18218 case IX86_BUILTIN_VEC_INIT_V8QI:
18219 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18220
18221 case IX86_BUILTIN_VEC_EXT_V2DF:
18222 case IX86_BUILTIN_VEC_EXT_V2DI:
18223 case IX86_BUILTIN_VEC_EXT_V4SF:
18224 case IX86_BUILTIN_VEC_EXT_V4SI:
18225 case IX86_BUILTIN_VEC_EXT_V8HI:
18226 case IX86_BUILTIN_VEC_EXT_V2SI:
18227 case IX86_BUILTIN_VEC_EXT_V4HI:
18228 return ix86_expand_vec_ext_builtin (exp, target);
18229
18230 case IX86_BUILTIN_VEC_SET_V8HI:
18231 case IX86_BUILTIN_VEC_SET_V4HI:
18232 return ix86_expand_vec_set_builtin (exp);
18233
18234 default:
18235 break;
18236 }
18237
18238 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18239 if (d->code == fcode)
18240 {
18241 /* Compares are treated specially. */
18242 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18243 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18244 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18245 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18246 return ix86_expand_sse_compare (d, exp, target);
18247
18248 return ix86_expand_binop_builtin (d->icode, exp, target);
18249 }
18250
18251 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18252 if (d->code == fcode)
18253 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18254
18255 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18256 if (d->code == fcode)
18257 return ix86_expand_sse_comi (d, exp, target);
18258
18259 gcc_unreachable ();
18260 }
18261
18262 /* Returns a function decl for a vectorized version of the builtin function
18263 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18264 if it is not available. */
18265
18266 static tree
18267 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18268 tree type_in)
18269 {
18270 enum machine_mode in_mode, out_mode;
18271 int in_n, out_n;
18272
18273 if (TREE_CODE (type_out) != VECTOR_TYPE
18274 || TREE_CODE (type_in) != VECTOR_TYPE)
18275 return NULL_TREE;
18276
18277 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18278 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18279 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18280 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18281
18282 switch (fn)
18283 {
18284 case BUILT_IN_SQRT:
18285 if (out_mode == DFmode && out_n == 2
18286 && in_mode == DFmode && in_n == 2)
18287 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18288 return NULL_TREE;
18289
18290 case BUILT_IN_SQRTF:
18291 if (out_mode == SFmode && out_n == 4
18292 && in_mode == SFmode && in_n == 4)
18293 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18294 return NULL_TREE;
18295
18296 case BUILT_IN_LRINTF:
18297 if (out_mode == SImode && out_n == 4
18298 && in_mode == SFmode && in_n == 4)
18299 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18300 return NULL_TREE;
18301
18302 default:
18303 ;
18304 }
18305
18306 return NULL_TREE;
18307 }
18308
18309 /* Returns a decl of a function that implements conversion of the
18310 input vector of type TYPE, or NULL_TREE if it is not available. */
18311
18312 static tree
18313 ix86_builtin_conversion (enum tree_code code, tree type)
18314 {
18315 if (TREE_CODE (type) != VECTOR_TYPE)
18316 return NULL_TREE;
18317
18318 switch (code)
18319 {
18320 case FLOAT_EXPR:
18321 switch (TYPE_MODE (type))
18322 {
18323 case V4SImode:
18324 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18325 default:
18326 return NULL_TREE;
18327 }
18328
18329 case FIX_TRUNC_EXPR:
18330 switch (TYPE_MODE (type))
18331 {
18332 case V4SFmode:
18333 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18334 default:
18335 return NULL_TREE;
18336 }
18337 default:
18338 return NULL_TREE;
18339
18340 }
18341 }
18342
18343 /* Store OPERAND to the memory after reload is completed. This means
18344 that we can't easily use assign_stack_local. */
18345 rtx
18346 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18347 {
18348 rtx result;
18349
18350 gcc_assert (reload_completed);
18351 if (TARGET_RED_ZONE)
18352 {
18353 result = gen_rtx_MEM (mode,
18354 gen_rtx_PLUS (Pmode,
18355 stack_pointer_rtx,
18356 GEN_INT (-RED_ZONE_SIZE)));
18357 emit_move_insn (result, operand);
18358 }
18359 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18360 {
18361 switch (mode)
18362 {
18363 case HImode:
18364 case SImode:
18365 operand = gen_lowpart (DImode, operand);
18366 /* FALLTHRU */
18367 case DImode:
18368 emit_insn (
18369 gen_rtx_SET (VOIDmode,
18370 gen_rtx_MEM (DImode,
18371 gen_rtx_PRE_DEC (DImode,
18372 stack_pointer_rtx)),
18373 operand));
18374 break;
18375 default:
18376 gcc_unreachable ();
18377 }
18378 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18379 }
18380 else
18381 {
18382 switch (mode)
18383 {
18384 case DImode:
18385 {
18386 rtx operands[2];
18387 split_di (&operand, 1, operands, operands + 1);
18388 emit_insn (
18389 gen_rtx_SET (VOIDmode,
18390 gen_rtx_MEM (SImode,
18391 gen_rtx_PRE_DEC (Pmode,
18392 stack_pointer_rtx)),
18393 operands[1]));
18394 emit_insn (
18395 gen_rtx_SET (VOIDmode,
18396 gen_rtx_MEM (SImode,
18397 gen_rtx_PRE_DEC (Pmode,
18398 stack_pointer_rtx)),
18399 operands[0]));
18400 }
18401 break;
18402 case HImode:
18403 /* Store HImodes as SImodes. */
18404 operand = gen_lowpart (SImode, operand);
18405 /* FALLTHRU */
18406 case SImode:
18407 emit_insn (
18408 gen_rtx_SET (VOIDmode,
18409 gen_rtx_MEM (GET_MODE (operand),
18410 gen_rtx_PRE_DEC (SImode,
18411 stack_pointer_rtx)),
18412 operand));
18413 break;
18414 default:
18415 gcc_unreachable ();
18416 }
18417 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18418 }
18419 return result;
18420 }
18421
18422 /* Free operand from the memory. */
18423 void
18424 ix86_free_from_memory (enum machine_mode mode)
18425 {
18426 if (!TARGET_RED_ZONE)
18427 {
18428 int size;
18429
18430 if (mode == DImode || TARGET_64BIT)
18431 size = 8;
18432 else
18433 size = 4;
18434 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18435 to pop or add instruction if registers are available. */
18436 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18437 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18438 GEN_INT (size))));
18439 }
18440 }
18441
18442 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18443 QImode must go into class Q_REGS.
18444 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18445 movdf to do mem-to-mem moves through integer regs. */
18446 enum reg_class
18447 ix86_preferred_reload_class (rtx x, enum reg_class class)
18448 {
18449 enum machine_mode mode = GET_MODE (x);
18450
18451 /* We're only allowed to return a subclass of CLASS. Many of the
18452 following checks fail for NO_REGS, so eliminate that early. */
18453 if (class == NO_REGS)
18454 return NO_REGS;
18455
18456 /* All classes can load zeros. */
18457 if (x == CONST0_RTX (mode))
18458 return class;
18459
18460 /* Force constants into memory if we are loading a (nonzero) constant into
18461 an MMX or SSE register. This is because there are no MMX/SSE instructions
18462 to load from a constant. */
18463 if (CONSTANT_P (x)
18464 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18465 return NO_REGS;
18466
18467 /* Prefer SSE regs only, if we can use them for math. */
18468 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18469 return SSE_CLASS_P (class) ? class : NO_REGS;
18470
18471 /* Floating-point constants need more complex checks. */
18472 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18473 {
18474 /* General regs can load everything. */
18475 if (reg_class_subset_p (class, GENERAL_REGS))
18476 return class;
18477
18478 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18479 zero above. We only want to wind up preferring 80387 registers if
18480 we plan on doing computation with them. */
18481 if (TARGET_80387
18482 && standard_80387_constant_p (x))
18483 {
18484 /* Limit class to non-sse. */
18485 if (class == FLOAT_SSE_REGS)
18486 return FLOAT_REGS;
18487 if (class == FP_TOP_SSE_REGS)
18488 return FP_TOP_REG;
18489 if (class == FP_SECOND_SSE_REGS)
18490 return FP_SECOND_REG;
18491 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18492 return class;
18493 }
18494
18495 return NO_REGS;
18496 }
18497
18498 /* Generally when we see PLUS here, it's the function invariant
18499 (plus soft-fp const_int). Which can only be computed into general
18500 regs. */
18501 if (GET_CODE (x) == PLUS)
18502 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18503
18504 /* QImode constants are easy to load, but non-constant QImode data
18505 must go into Q_REGS. */
18506 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18507 {
18508 if (reg_class_subset_p (class, Q_REGS))
18509 return class;
18510 if (reg_class_subset_p (Q_REGS, class))
18511 return Q_REGS;
18512 return NO_REGS;
18513 }
18514
18515 return class;
18516 }
18517
18518 /* Discourage putting floating-point values in SSE registers unless
18519 SSE math is being used, and likewise for the 387 registers. */
18520 enum reg_class
18521 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18522 {
18523 enum machine_mode mode = GET_MODE (x);
18524
18525 /* Restrict the output reload class to the register bank that we are doing
18526 math on. If we would like not to return a subset of CLASS, reject this
18527 alternative: if reload cannot do this, it will still use its choice. */
18528 mode = GET_MODE (x);
18529 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18530 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18531
18532 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18533 {
18534 if (class == FP_TOP_SSE_REGS)
18535 return FP_TOP_REG;
18536 else if (class == FP_SECOND_SSE_REGS)
18537 return FP_SECOND_REG;
18538 else
18539 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18540 }
18541
18542 return class;
18543 }
18544
18545 /* If we are copying between general and FP registers, we need a memory
18546 location. The same is true for SSE and MMX registers.
18547
18548 The macro can't work reliably when one of the CLASSES is class containing
18549 registers from multiple units (SSE, MMX, integer). We avoid this by never
18550 combining those units in single alternative in the machine description.
18551 Ensure that this constraint holds to avoid unexpected surprises.
18552
18553 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18554 enforce these sanity checks. */
18555
18556 int
18557 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18558 enum machine_mode mode, int strict)
18559 {
18560 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18561 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18562 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18563 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18564 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18565 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18566 {
18567 gcc_assert (!strict);
18568 return true;
18569 }
18570
18571 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18572 return true;
18573
18574 /* ??? This is a lie. We do have moves between mmx/general, and for
18575 mmx/sse2. But by saying we need secondary memory we discourage the
18576 register allocator from using the mmx registers unless needed. */
18577 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18578 return true;
18579
18580 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18581 {
18582 /* SSE1 doesn't have any direct moves from other classes. */
18583 if (!TARGET_SSE2)
18584 return true;
18585
18586 /* If the target says that inter-unit moves are more expensive
18587 than moving through memory, then don't generate them. */
18588 if (!TARGET_INTER_UNIT_MOVES)
18589 return true;
18590
18591 /* Between SSE and general, we have moves no larger than word size. */
18592 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18593 return true;
18594 }
18595
18596 return false;
18597 }
18598
18599 /* Return true if the registers in CLASS cannot represent the change from
18600 modes FROM to TO. */
18601
18602 bool
18603 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18604 enum reg_class class)
18605 {
18606 if (from == to)
18607 return false;
18608
18609 /* x87 registers can't do subreg at all, as all values are reformatted
18610 to extended precision. */
18611 if (MAYBE_FLOAT_CLASS_P (class))
18612 return true;
18613
18614 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18615 {
18616 /* Vector registers do not support QI or HImode loads. If we don't
18617 disallow a change to these modes, reload will assume it's ok to
18618 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18619 the vec_dupv4hi pattern. */
18620 if (GET_MODE_SIZE (from) < 4)
18621 return true;
18622
18623 /* Vector registers do not support subreg with nonzero offsets, which
18624 are otherwise valid for integer registers. Since we can't see
18625 whether we have a nonzero offset from here, prohibit all
18626 nonparadoxical subregs changing size. */
18627 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18628 return true;
18629 }
18630
18631 return false;
18632 }
18633
18634 /* Return the cost of moving data from a register in class CLASS1 to
18635 one in class CLASS2.
18636
18637 It is not required that the cost always equal 2 when FROM is the same as TO;
18638 on some machines it is expensive to move between registers if they are not
18639 general registers. */
18640
18641 int
18642 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18643 enum reg_class class2)
18644 {
18645 /* In case we require secondary memory, compute cost of the store followed
18646 by load. In order to avoid bad register allocation choices, we need
18647 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18648
18649 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18650 {
18651 int cost = 1;
18652
18653 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18654 MEMORY_MOVE_COST (mode, class1, 1));
18655 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18656 MEMORY_MOVE_COST (mode, class2, 1));
18657
18658 /* In case of copying from general_purpose_register we may emit multiple
18659 stores followed by single load causing memory size mismatch stall.
18660 Count this as arbitrarily high cost of 20. */
18661 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18662 cost += 20;
18663
18664 /* In the case of FP/MMX moves, the registers actually overlap, and we
18665 have to switch modes in order to treat them differently. */
18666 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18667 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18668 cost += 20;
18669
18670 return cost;
18671 }
18672
18673 /* Moves between SSE/MMX and integer unit are expensive. */
18674 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18675 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18676 return ix86_cost->mmxsse_to_integer;
18677 if (MAYBE_FLOAT_CLASS_P (class1))
18678 return ix86_cost->fp_move;
18679 if (MAYBE_SSE_CLASS_P (class1))
18680 return ix86_cost->sse_move;
18681 if (MAYBE_MMX_CLASS_P (class1))
18682 return ix86_cost->mmx_move;
18683 return 2;
18684 }
18685
18686 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18687
18688 bool
18689 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18690 {
18691 /* Flags and only flags can only hold CCmode values. */
18692 if (CC_REGNO_P (regno))
18693 return GET_MODE_CLASS (mode) == MODE_CC;
18694 if (GET_MODE_CLASS (mode) == MODE_CC
18695 || GET_MODE_CLASS (mode) == MODE_RANDOM
18696 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18697 return 0;
18698 if (FP_REGNO_P (regno))
18699 return VALID_FP_MODE_P (mode);
18700 if (SSE_REGNO_P (regno))
18701 {
18702 /* We implement the move patterns for all vector modes into and
18703 out of SSE registers, even when no operation instructions
18704 are available. */
18705 return (VALID_SSE_REG_MODE (mode)
18706 || VALID_SSE2_REG_MODE (mode)
18707 || VALID_MMX_REG_MODE (mode)
18708 || VALID_MMX_REG_MODE_3DNOW (mode));
18709 }
18710 if (MMX_REGNO_P (regno))
18711 {
18712 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18713 so if the register is available at all, then we can move data of
18714 the given mode into or out of it. */
18715 return (VALID_MMX_REG_MODE (mode)
18716 || VALID_MMX_REG_MODE_3DNOW (mode));
18717 }
18718
18719 if (mode == QImode)
18720 {
18721 /* Take care for QImode values - they can be in non-QI regs,
18722 but then they do cause partial register stalls. */
18723 if (regno < 4 || TARGET_64BIT)
18724 return 1;
18725 if (!TARGET_PARTIAL_REG_STALL)
18726 return 1;
18727 return reload_in_progress || reload_completed;
18728 }
18729 /* We handle both integer and floats in the general purpose registers. */
18730 else if (VALID_INT_MODE_P (mode))
18731 return 1;
18732 else if (VALID_FP_MODE_P (mode))
18733 return 1;
18734 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18735 on to use that value in smaller contexts, this can easily force a
18736 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18737 supporting DImode, allow it. */
18738 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18739 return 1;
18740
18741 return 0;
18742 }
18743
18744 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18745 tieable integer mode. */
18746
18747 static bool
18748 ix86_tieable_integer_mode_p (enum machine_mode mode)
18749 {
18750 switch (mode)
18751 {
18752 case HImode:
18753 case SImode:
18754 return true;
18755
18756 case QImode:
18757 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18758
18759 case DImode:
18760 return TARGET_64BIT;
18761
18762 default:
18763 return false;
18764 }
18765 }
18766
18767 /* Return true if MODE1 is accessible in a register that can hold MODE2
18768 without copying. That is, all register classes that can hold MODE2
18769 can also hold MODE1. */
18770
18771 bool
18772 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18773 {
18774 if (mode1 == mode2)
18775 return true;
18776
18777 if (ix86_tieable_integer_mode_p (mode1)
18778 && ix86_tieable_integer_mode_p (mode2))
18779 return true;
18780
18781 /* MODE2 being XFmode implies fp stack or general regs, which means we
18782 can tie any smaller floating point modes to it. Note that we do not
18783 tie this with TFmode. */
18784 if (mode2 == XFmode)
18785 return mode1 == SFmode || mode1 == DFmode;
18786
18787 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18788 that we can tie it with SFmode. */
18789 if (mode2 == DFmode)
18790 return mode1 == SFmode;
18791
18792 /* If MODE2 is only appropriate for an SSE register, then tie with
18793 any other mode acceptable to SSE registers. */
18794 if (GET_MODE_SIZE (mode2) == 16
18795 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18796 return (GET_MODE_SIZE (mode1) == 16
18797 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18798
18799 /* If MODE2 is appropriate for an MMX register, then tie
18800 with any other mode acceptable to MMX registers. */
18801 if (GET_MODE_SIZE (mode2) == 8
18802 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18803 return (GET_MODE_SIZE (mode1) == 8
18804 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18805
18806 return false;
18807 }
18808
18809 /* Return the cost of moving data of mode M between a
18810 register and memory. A value of 2 is the default; this cost is
18811 relative to those in `REGISTER_MOVE_COST'.
18812
18813 If moving between registers and memory is more expensive than
18814 between two registers, you should define this macro to express the
18815 relative cost.
18816
18817 Model also increased moving costs of QImode registers in non
18818 Q_REGS classes.
18819 */
18820 int
18821 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18822 {
18823 if (FLOAT_CLASS_P (class))
18824 {
18825 int index;
18826 switch (mode)
18827 {
18828 case SFmode:
18829 index = 0;
18830 break;
18831 case DFmode:
18832 index = 1;
18833 break;
18834 case XFmode:
18835 index = 2;
18836 break;
18837 default:
18838 return 100;
18839 }
18840 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18841 }
18842 if (SSE_CLASS_P (class))
18843 {
18844 int index;
18845 switch (GET_MODE_SIZE (mode))
18846 {
18847 case 4:
18848 index = 0;
18849 break;
18850 case 8:
18851 index = 1;
18852 break;
18853 case 16:
18854 index = 2;
18855 break;
18856 default:
18857 return 100;
18858 }
18859 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18860 }
18861 if (MMX_CLASS_P (class))
18862 {
18863 int index;
18864 switch (GET_MODE_SIZE (mode))
18865 {
18866 case 4:
18867 index = 0;
18868 break;
18869 case 8:
18870 index = 1;
18871 break;
18872 default:
18873 return 100;
18874 }
18875 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18876 }
18877 switch (GET_MODE_SIZE (mode))
18878 {
18879 case 1:
18880 if (in)
18881 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18882 : ix86_cost->movzbl_load);
18883 else
18884 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18885 : ix86_cost->int_store[0] + 4);
18886 break;
18887 case 2:
18888 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18889 default:
18890 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18891 if (mode == TFmode)
18892 mode = XFmode;
18893 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18894 * (((int) GET_MODE_SIZE (mode)
18895 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18896 }
18897 }
18898
18899 /* Compute a (partial) cost for rtx X. Return true if the complete
18900 cost has been computed, and false if subexpressions should be
18901 scanned. In either case, *TOTAL contains the cost result. */
18902
18903 static bool
18904 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18905 {
18906 enum machine_mode mode = GET_MODE (x);
18907
18908 switch (code)
18909 {
18910 case CONST_INT:
18911 case CONST:
18912 case LABEL_REF:
18913 case SYMBOL_REF:
18914 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18915 *total = 3;
18916 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18917 *total = 2;
18918 else if (flag_pic && SYMBOLIC_CONST (x)
18919 && (!TARGET_64BIT
18920 || (!GET_CODE (x) != LABEL_REF
18921 && (GET_CODE (x) != SYMBOL_REF
18922 || !SYMBOL_REF_LOCAL_P (x)))))
18923 *total = 1;
18924 else
18925 *total = 0;
18926 return true;
18927
18928 case CONST_DOUBLE:
18929 if (mode == VOIDmode)
18930 *total = 0;
18931 else
18932 switch (standard_80387_constant_p (x))
18933 {
18934 case 1: /* 0.0 */
18935 *total = 1;
18936 break;
18937 default: /* Other constants */
18938 *total = 2;
18939 break;
18940 case 0:
18941 case -1:
18942 /* Start with (MEM (SYMBOL_REF)), since that's where
18943 it'll probably end up. Add a penalty for size. */
18944 *total = (COSTS_N_INSNS (1)
18945 + (flag_pic != 0 && !TARGET_64BIT)
18946 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18947 break;
18948 }
18949 return true;
18950
18951 case ZERO_EXTEND:
18952 /* The zero extensions is often completely free on x86_64, so make
18953 it as cheap as possible. */
18954 if (TARGET_64BIT && mode == DImode
18955 && GET_MODE (XEXP (x, 0)) == SImode)
18956 *total = 1;
18957 else if (TARGET_ZERO_EXTEND_WITH_AND)
18958 *total = ix86_cost->add;
18959 else
18960 *total = ix86_cost->movzx;
18961 return false;
18962
18963 case SIGN_EXTEND:
18964 *total = ix86_cost->movsx;
18965 return false;
18966
18967 case ASHIFT:
18968 if (CONST_INT_P (XEXP (x, 1))
18969 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18970 {
18971 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18972 if (value == 1)
18973 {
18974 *total = ix86_cost->add;
18975 return false;
18976 }
18977 if ((value == 2 || value == 3)
18978 && ix86_cost->lea <= ix86_cost->shift_const)
18979 {
18980 *total = ix86_cost->lea;
18981 return false;
18982 }
18983 }
18984 /* FALLTHRU */
18985
18986 case ROTATE:
18987 case ASHIFTRT:
18988 case LSHIFTRT:
18989 case ROTATERT:
18990 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18991 {
18992 if (CONST_INT_P (XEXP (x, 1)))
18993 {
18994 if (INTVAL (XEXP (x, 1)) > 32)
18995 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18996 else
18997 *total = ix86_cost->shift_const * 2;
18998 }
18999 else
19000 {
19001 if (GET_CODE (XEXP (x, 1)) == AND)
19002 *total = ix86_cost->shift_var * 2;
19003 else
19004 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19005 }
19006 }
19007 else
19008 {
19009 if (CONST_INT_P (XEXP (x, 1)))
19010 *total = ix86_cost->shift_const;
19011 else
19012 *total = ix86_cost->shift_var;
19013 }
19014 return false;
19015
19016 case MULT:
19017 if (FLOAT_MODE_P (mode))
19018 {
19019 *total = ix86_cost->fmul;
19020 return false;
19021 }
19022 else
19023 {
19024 rtx op0 = XEXP (x, 0);
19025 rtx op1 = XEXP (x, 1);
19026 int nbits;
19027 if (CONST_INT_P (XEXP (x, 1)))
19028 {
19029 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19030 for (nbits = 0; value != 0; value &= value - 1)
19031 nbits++;
19032 }
19033 else
19034 /* This is arbitrary. */
19035 nbits = 7;
19036
19037 /* Compute costs correctly for widening multiplication. */
19038 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19039 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19040 == GET_MODE_SIZE (mode))
19041 {
19042 int is_mulwiden = 0;
19043 enum machine_mode inner_mode = GET_MODE (op0);
19044
19045 if (GET_CODE (op0) == GET_CODE (op1))
19046 is_mulwiden = 1, op1 = XEXP (op1, 0);
19047 else if (CONST_INT_P (op1))
19048 {
19049 if (GET_CODE (op0) == SIGN_EXTEND)
19050 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19051 == INTVAL (op1);
19052 else
19053 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19054 }
19055
19056 if (is_mulwiden)
19057 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19058 }
19059
19060 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19061 + nbits * ix86_cost->mult_bit
19062 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19063
19064 return true;
19065 }
19066
19067 case DIV:
19068 case UDIV:
19069 case MOD:
19070 case UMOD:
19071 if (FLOAT_MODE_P (mode))
19072 *total = ix86_cost->fdiv;
19073 else
19074 *total = ix86_cost->divide[MODE_INDEX (mode)];
19075 return false;
19076
19077 case PLUS:
19078 if (FLOAT_MODE_P (mode))
19079 *total = ix86_cost->fadd;
19080 else if (GET_MODE_CLASS (mode) == MODE_INT
19081 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19082 {
19083 if (GET_CODE (XEXP (x, 0)) == PLUS
19084 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19085 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19086 && CONSTANT_P (XEXP (x, 1)))
19087 {
19088 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19089 if (val == 2 || val == 4 || val == 8)
19090 {
19091 *total = ix86_cost->lea;
19092 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19093 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19094 outer_code);
19095 *total += rtx_cost (XEXP (x, 1), outer_code);
19096 return true;
19097 }
19098 }
19099 else if (GET_CODE (XEXP (x, 0)) == MULT
19100 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19101 {
19102 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19103 if (val == 2 || val == 4 || val == 8)
19104 {
19105 *total = ix86_cost->lea;
19106 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19107 *total += rtx_cost (XEXP (x, 1), outer_code);
19108 return true;
19109 }
19110 }
19111 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19112 {
19113 *total = ix86_cost->lea;
19114 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19115 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19116 *total += rtx_cost (XEXP (x, 1), outer_code);
19117 return true;
19118 }
19119 }
19120 /* FALLTHRU */
19121
19122 case MINUS:
19123 if (FLOAT_MODE_P (mode))
19124 {
19125 *total = ix86_cost->fadd;
19126 return false;
19127 }
19128 /* FALLTHRU */
19129
19130 case AND:
19131 case IOR:
19132 case XOR:
19133 if (!TARGET_64BIT && mode == DImode)
19134 {
19135 *total = (ix86_cost->add * 2
19136 + (rtx_cost (XEXP (x, 0), outer_code)
19137 << (GET_MODE (XEXP (x, 0)) != DImode))
19138 + (rtx_cost (XEXP (x, 1), outer_code)
19139 << (GET_MODE (XEXP (x, 1)) != DImode)));
19140 return true;
19141 }
19142 /* FALLTHRU */
19143
19144 case NEG:
19145 if (FLOAT_MODE_P (mode))
19146 {
19147 *total = ix86_cost->fchs;
19148 return false;
19149 }
19150 /* FALLTHRU */
19151
19152 case NOT:
19153 if (!TARGET_64BIT && mode == DImode)
19154 *total = ix86_cost->add * 2;
19155 else
19156 *total = ix86_cost->add;
19157 return false;
19158
19159 case COMPARE:
19160 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19161 && XEXP (XEXP (x, 0), 1) == const1_rtx
19162 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19163 && XEXP (x, 1) == const0_rtx)
19164 {
19165 /* This kind of construct is implemented using test[bwl].
19166 Treat it as if we had an AND. */
19167 *total = (ix86_cost->add
19168 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19169 + rtx_cost (const1_rtx, outer_code));
19170 return true;
19171 }
19172 return false;
19173
19174 case FLOAT_EXTEND:
19175 if (!TARGET_SSE_MATH
19176 || mode == XFmode
19177 || (mode == DFmode && !TARGET_SSE2))
19178 *total = 0;
19179 return false;
19180
19181 case ABS:
19182 if (FLOAT_MODE_P (mode))
19183 *total = ix86_cost->fabs;
19184 return false;
19185
19186 case SQRT:
19187 if (FLOAT_MODE_P (mode))
19188 *total = ix86_cost->fsqrt;
19189 return false;
19190
19191 case UNSPEC:
19192 if (XINT (x, 1) == UNSPEC_TP)
19193 *total = 0;
19194 return false;
19195
19196 default:
19197 return false;
19198 }
19199 }
19200
19201 #if TARGET_MACHO
19202
19203 static int current_machopic_label_num;
19204
19205 /* Given a symbol name and its associated stub, write out the
19206 definition of the stub. */
19207
19208 void
19209 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19210 {
19211 unsigned int length;
19212 char *binder_name, *symbol_name, lazy_ptr_name[32];
19213 int label = ++current_machopic_label_num;
19214
19215 /* For 64-bit we shouldn't get here. */
19216 gcc_assert (!TARGET_64BIT);
19217
19218 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19219 symb = (*targetm.strip_name_encoding) (symb);
19220
19221 length = strlen (stub);
19222 binder_name = alloca (length + 32);
19223 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19224
19225 length = strlen (symb);
19226 symbol_name = alloca (length + 32);
19227 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19228
19229 sprintf (lazy_ptr_name, "L%d$lz", label);
19230
19231 if (MACHOPIC_PURE)
19232 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19233 else
19234 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19235
19236 fprintf (file, "%s:\n", stub);
19237 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19238
19239 if (MACHOPIC_PURE)
19240 {
19241 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19242 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19243 fprintf (file, "\tjmp\t*%%edx\n");
19244 }
19245 else
19246 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19247
19248 fprintf (file, "%s:\n", binder_name);
19249
19250 if (MACHOPIC_PURE)
19251 {
19252 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19253 fprintf (file, "\tpushl\t%%eax\n");
19254 }
19255 else
19256 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19257
19258 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19259
19260 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19261 fprintf (file, "%s:\n", lazy_ptr_name);
19262 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19263 fprintf (file, "\t.long %s\n", binder_name);
19264 }
19265
19266 void
19267 darwin_x86_file_end (void)
19268 {
19269 darwin_file_end ();
19270 ix86_file_end ();
19271 }
19272 #endif /* TARGET_MACHO */
19273
19274 /* Order the registers for register allocator. */
19275
19276 void
19277 x86_order_regs_for_local_alloc (void)
19278 {
19279 int pos = 0;
19280 int i;
19281
19282 /* First allocate the local general purpose registers. */
19283 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19284 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19285 reg_alloc_order [pos++] = i;
19286
19287 /* Global general purpose registers. */
19288 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19289 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19290 reg_alloc_order [pos++] = i;
19291
19292 /* x87 registers come first in case we are doing FP math
19293 using them. */
19294 if (!TARGET_SSE_MATH)
19295 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19296 reg_alloc_order [pos++] = i;
19297
19298 /* SSE registers. */
19299 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19300 reg_alloc_order [pos++] = i;
19301 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19302 reg_alloc_order [pos++] = i;
19303
19304 /* x87 registers. */
19305 if (TARGET_SSE_MATH)
19306 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19307 reg_alloc_order [pos++] = i;
19308
19309 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19310 reg_alloc_order [pos++] = i;
19311
19312 /* Initialize the rest of array as we do not allocate some registers
19313 at all. */
19314 while (pos < FIRST_PSEUDO_REGISTER)
19315 reg_alloc_order [pos++] = 0;
19316 }
19317
19318 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19319 struct attribute_spec.handler. */
19320 static tree
19321 ix86_handle_struct_attribute (tree *node, tree name,
19322 tree args ATTRIBUTE_UNUSED,
19323 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19324 {
19325 tree *type = NULL;
19326 if (DECL_P (*node))
19327 {
19328 if (TREE_CODE (*node) == TYPE_DECL)
19329 type = &TREE_TYPE (*node);
19330 }
19331 else
19332 type = node;
19333
19334 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19335 || TREE_CODE (*type) == UNION_TYPE)))
19336 {
19337 warning (OPT_Wattributes, "%qs attribute ignored",
19338 IDENTIFIER_POINTER (name));
19339 *no_add_attrs = true;
19340 }
19341
19342 else if ((is_attribute_p ("ms_struct", name)
19343 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19344 || ((is_attribute_p ("gcc_struct", name)
19345 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19346 {
19347 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19348 IDENTIFIER_POINTER (name));
19349 *no_add_attrs = true;
19350 }
19351
19352 return NULL_TREE;
19353 }
19354
19355 static bool
19356 ix86_ms_bitfield_layout_p (tree record_type)
19357 {
19358 return (TARGET_MS_BITFIELD_LAYOUT &&
19359 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19360 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19361 }
19362
19363 /* Returns an expression indicating where the this parameter is
19364 located on entry to the FUNCTION. */
19365
19366 static rtx
19367 x86_this_parameter (tree function)
19368 {
19369 tree type = TREE_TYPE (function);
19370
19371 if (TARGET_64BIT)
19372 {
19373 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19374 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19375 }
19376
19377 if (ix86_function_regparm (type, function) > 0)
19378 {
19379 tree parm;
19380
19381 parm = TYPE_ARG_TYPES (type);
19382 /* Figure out whether or not the function has a variable number of
19383 arguments. */
19384 for (; parm; parm = TREE_CHAIN (parm))
19385 if (TREE_VALUE (parm) == void_type_node)
19386 break;
19387 /* If not, the this parameter is in the first argument. */
19388 if (parm)
19389 {
19390 int regno = 0;
19391 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19392 regno = 2;
19393 return gen_rtx_REG (SImode, regno);
19394 }
19395 }
19396
19397 if (aggregate_value_p (TREE_TYPE (type), type))
19398 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19399 else
19400 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19401 }
19402
19403 /* Determine whether x86_output_mi_thunk can succeed. */
19404
19405 static bool
19406 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19407 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19408 HOST_WIDE_INT vcall_offset, tree function)
19409 {
19410 /* 64-bit can handle anything. */
19411 if (TARGET_64BIT)
19412 return true;
19413
19414 /* For 32-bit, everything's fine if we have one free register. */
19415 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19416 return true;
19417
19418 /* Need a free register for vcall_offset. */
19419 if (vcall_offset)
19420 return false;
19421
19422 /* Need a free register for GOT references. */
19423 if (flag_pic && !(*targetm.binds_local_p) (function))
19424 return false;
19425
19426 /* Otherwise ok. */
19427 return true;
19428 }
19429
19430 /* Output the assembler code for a thunk function. THUNK_DECL is the
19431 declaration for the thunk function itself, FUNCTION is the decl for
19432 the target function. DELTA is an immediate constant offset to be
19433 added to THIS. If VCALL_OFFSET is nonzero, the word at
19434 *(*this + vcall_offset) should be added to THIS. */
19435
19436 static void
19437 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19438 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19439 HOST_WIDE_INT vcall_offset, tree function)
19440 {
19441 rtx xops[3];
19442 rtx this = x86_this_parameter (function);
19443 rtx this_reg, tmp;
19444
19445 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19446 pull it in now and let DELTA benefit. */
19447 if (REG_P (this))
19448 this_reg = this;
19449 else if (vcall_offset)
19450 {
19451 /* Put the this parameter into %eax. */
19452 xops[0] = this;
19453 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19454 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19455 }
19456 else
19457 this_reg = NULL_RTX;
19458
19459 /* Adjust the this parameter by a fixed constant. */
19460 if (delta)
19461 {
19462 xops[0] = GEN_INT (delta);
19463 xops[1] = this_reg ? this_reg : this;
19464 if (TARGET_64BIT)
19465 {
19466 if (!x86_64_general_operand (xops[0], DImode))
19467 {
19468 tmp = gen_rtx_REG (DImode, R10_REG);
19469 xops[1] = tmp;
19470 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19471 xops[0] = tmp;
19472 xops[1] = this;
19473 }
19474 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19475 }
19476 else
19477 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19478 }
19479
19480 /* Adjust the this parameter by a value stored in the vtable. */
19481 if (vcall_offset)
19482 {
19483 if (TARGET_64BIT)
19484 tmp = gen_rtx_REG (DImode, R10_REG);
19485 else
19486 {
19487 int tmp_regno = 2 /* ECX */;
19488 if (lookup_attribute ("fastcall",
19489 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19490 tmp_regno = 0 /* EAX */;
19491 tmp = gen_rtx_REG (SImode, tmp_regno);
19492 }
19493
19494 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19495 xops[1] = tmp;
19496 if (TARGET_64BIT)
19497 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19498 else
19499 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19500
19501 /* Adjust the this parameter. */
19502 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19503 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19504 {
19505 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19506 xops[0] = GEN_INT (vcall_offset);
19507 xops[1] = tmp2;
19508 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19509 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19510 }
19511 xops[1] = this_reg;
19512 if (TARGET_64BIT)
19513 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19514 else
19515 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19516 }
19517
19518 /* If necessary, drop THIS back to its stack slot. */
19519 if (this_reg && this_reg != this)
19520 {
19521 xops[0] = this_reg;
19522 xops[1] = this;
19523 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19524 }
19525
19526 xops[0] = XEXP (DECL_RTL (function), 0);
19527 if (TARGET_64BIT)
19528 {
19529 if (!flag_pic || (*targetm.binds_local_p) (function))
19530 output_asm_insn ("jmp\t%P0", xops);
19531 else
19532 {
19533 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19534 tmp = gen_rtx_CONST (Pmode, tmp);
19535 tmp = gen_rtx_MEM (QImode, tmp);
19536 xops[0] = tmp;
19537 output_asm_insn ("jmp\t%A0", xops);
19538 }
19539 }
19540 else
19541 {
19542 if (!flag_pic || (*targetm.binds_local_p) (function))
19543 output_asm_insn ("jmp\t%P0", xops);
19544 else
19545 #if TARGET_MACHO
19546 if (TARGET_MACHO)
19547 {
19548 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19549 tmp = (gen_rtx_SYMBOL_REF
19550 (Pmode,
19551 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19552 tmp = gen_rtx_MEM (QImode, tmp);
19553 xops[0] = tmp;
19554 output_asm_insn ("jmp\t%0", xops);
19555 }
19556 else
19557 #endif /* TARGET_MACHO */
19558 {
19559 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19560 output_set_got (tmp, NULL_RTX);
19561
19562 xops[1] = tmp;
19563 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19564 output_asm_insn ("jmp\t{*}%1", xops);
19565 }
19566 }
19567 }
19568
19569 static void
19570 x86_file_start (void)
19571 {
19572 default_file_start ();
19573 #if TARGET_MACHO
19574 darwin_file_start ();
19575 #endif
19576 if (X86_FILE_START_VERSION_DIRECTIVE)
19577 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19578 if (X86_FILE_START_FLTUSED)
19579 fputs ("\t.global\t__fltused\n", asm_out_file);
19580 if (ix86_asm_dialect == ASM_INTEL)
19581 fputs ("\t.intel_syntax\n", asm_out_file);
19582 }
19583
19584 int
19585 x86_field_alignment (tree field, int computed)
19586 {
19587 enum machine_mode mode;
19588 tree type = TREE_TYPE (field);
19589
19590 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19591 return computed;
19592 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19593 ? get_inner_array_type (type) : type);
19594 if (mode == DFmode || mode == DCmode
19595 || GET_MODE_CLASS (mode) == MODE_INT
19596 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19597 return MIN (32, computed);
19598 return computed;
19599 }
19600
19601 /* Output assembler code to FILE to increment profiler label # LABELNO
19602 for profiling a function entry. */
19603 void
19604 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19605 {
19606 if (TARGET_64BIT)
19607 if (flag_pic)
19608 {
19609 #ifndef NO_PROFILE_COUNTERS
19610 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19611 #endif
19612 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19613 }
19614 else
19615 {
19616 #ifndef NO_PROFILE_COUNTERS
19617 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19618 #endif
19619 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19620 }
19621 else if (flag_pic)
19622 {
19623 #ifndef NO_PROFILE_COUNTERS
19624 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19625 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19626 #endif
19627 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19628 }
19629 else
19630 {
19631 #ifndef NO_PROFILE_COUNTERS
19632 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19633 PROFILE_COUNT_REGISTER);
19634 #endif
19635 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19636 }
19637 }
19638
19639 /* We don't have exact information about the insn sizes, but we may assume
19640 quite safely that we are informed about all 1 byte insns and memory
19641 address sizes. This is enough to eliminate unnecessary padding in
19642 99% of cases. */
19643
19644 static int
19645 min_insn_size (rtx insn)
19646 {
19647 int l = 0;
19648
19649 if (!INSN_P (insn) || !active_insn_p (insn))
19650 return 0;
19651
19652 /* Discard alignments we've emit and jump instructions. */
19653 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19654 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19655 return 0;
19656 if (JUMP_P (insn)
19657 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19658 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19659 return 0;
19660
19661 /* Important case - calls are always 5 bytes.
19662 It is common to have many calls in the row. */
19663 if (CALL_P (insn)
19664 && symbolic_reference_mentioned_p (PATTERN (insn))
19665 && !SIBLING_CALL_P (insn))
19666 return 5;
19667 if (get_attr_length (insn) <= 1)
19668 return 1;
19669
19670 /* For normal instructions we may rely on the sizes of addresses
19671 and the presence of symbol to require 4 bytes of encoding.
19672 This is not the case for jumps where references are PC relative. */
19673 if (!JUMP_P (insn))
19674 {
19675 l = get_attr_length_address (insn);
19676 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19677 l = 4;
19678 }
19679 if (l)
19680 return 1+l;
19681 else
19682 return 2;
19683 }
19684
19685 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19686 window. */
19687
19688 static void
19689 ix86_avoid_jump_misspredicts (void)
19690 {
19691 rtx insn, start = get_insns ();
19692 int nbytes = 0, njumps = 0;
19693 int isjump = 0;
19694
19695 /* Look for all minimal intervals of instructions containing 4 jumps.
19696 The intervals are bounded by START and INSN. NBYTES is the total
19697 size of instructions in the interval including INSN and not including
19698 START. When the NBYTES is smaller than 16 bytes, it is possible
19699 that the end of START and INSN ends up in the same 16byte page.
19700
19701 The smallest offset in the page INSN can start is the case where START
19702 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19703 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19704 */
19705 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19706 {
19707
19708 nbytes += min_insn_size (insn);
19709 if (dump_file)
19710 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19711 INSN_UID (insn), min_insn_size (insn));
19712 if ((JUMP_P (insn)
19713 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19714 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19715 || CALL_P (insn))
19716 njumps++;
19717 else
19718 continue;
19719
19720 while (njumps > 3)
19721 {
19722 start = NEXT_INSN (start);
19723 if ((JUMP_P (start)
19724 && GET_CODE (PATTERN (start)) != ADDR_VEC
19725 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19726 || CALL_P (start))
19727 njumps--, isjump = 1;
19728 else
19729 isjump = 0;
19730 nbytes -= min_insn_size (start);
19731 }
19732 gcc_assert (njumps >= 0);
19733 if (dump_file)
19734 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19735 INSN_UID (start), INSN_UID (insn), nbytes);
19736
19737 if (njumps == 3 && isjump && nbytes < 16)
19738 {
19739 int padsize = 15 - nbytes + min_insn_size (insn);
19740
19741 if (dump_file)
19742 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19743 INSN_UID (insn), padsize);
19744 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19745 }
19746 }
19747 }
19748
19749 /* AMD Athlon works faster
19750 when RET is not destination of conditional jump or directly preceded
19751 by other jump instruction. We avoid the penalty by inserting NOP just
19752 before the RET instructions in such cases. */
19753 static void
19754 ix86_pad_returns (void)
19755 {
19756 edge e;
19757 edge_iterator ei;
19758
19759 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19760 {
19761 basic_block bb = e->src;
19762 rtx ret = BB_END (bb);
19763 rtx prev;
19764 bool replace = false;
19765
19766 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19767 || !maybe_hot_bb_p (bb))
19768 continue;
19769 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19770 if (active_insn_p (prev) || LABEL_P (prev))
19771 break;
19772 if (prev && LABEL_P (prev))
19773 {
19774 edge e;
19775 edge_iterator ei;
19776
19777 FOR_EACH_EDGE (e, ei, bb->preds)
19778 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19779 && !(e->flags & EDGE_FALLTHRU))
19780 replace = true;
19781 }
19782 if (!replace)
19783 {
19784 prev = prev_active_insn (ret);
19785 if (prev
19786 && ((JUMP_P (prev) && any_condjump_p (prev))
19787 || CALL_P (prev)))
19788 replace = true;
19789 /* Empty functions get branch mispredict even when the jump destination
19790 is not visible to us. */
19791 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19792 replace = true;
19793 }
19794 if (replace)
19795 {
19796 emit_insn_before (gen_return_internal_long (), ret);
19797 delete_insn (ret);
19798 }
19799 }
19800 }
19801
19802 /* Implement machine specific optimizations. We implement padding of returns
19803 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19804 static void
19805 ix86_reorg (void)
19806 {
19807 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19808 ix86_pad_returns ();
19809 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19810 ix86_avoid_jump_misspredicts ();
19811 }
19812
19813 /* Return nonzero when QImode register that must be represented via REX prefix
19814 is used. */
19815 bool
19816 x86_extended_QIreg_mentioned_p (rtx insn)
19817 {
19818 int i;
19819 extract_insn_cached (insn);
19820 for (i = 0; i < recog_data.n_operands; i++)
19821 if (REG_P (recog_data.operand[i])
19822 && REGNO (recog_data.operand[i]) >= 4)
19823 return true;
19824 return false;
19825 }
19826
19827 /* Return nonzero when P points to register encoded via REX prefix.
19828 Called via for_each_rtx. */
19829 static int
19830 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19831 {
19832 unsigned int regno;
19833 if (!REG_P (*p))
19834 return 0;
19835 regno = REGNO (*p);
19836 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19837 }
19838
19839 /* Return true when INSN mentions register that must be encoded using REX
19840 prefix. */
19841 bool
19842 x86_extended_reg_mentioned_p (rtx insn)
19843 {
19844 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19845 }
19846
19847 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19848 optabs would emit if we didn't have TFmode patterns. */
19849
19850 void
19851 x86_emit_floatuns (rtx operands[2])
19852 {
19853 rtx neglab, donelab, i0, i1, f0, in, out;
19854 enum machine_mode mode, inmode;
19855
19856 inmode = GET_MODE (operands[1]);
19857 gcc_assert (inmode == SImode || inmode == DImode);
19858
19859 out = operands[0];
19860 in = force_reg (inmode, operands[1]);
19861 mode = GET_MODE (out);
19862 neglab = gen_label_rtx ();
19863 donelab = gen_label_rtx ();
19864 f0 = gen_reg_rtx (mode);
19865
19866 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19867
19868 expand_float (out, in, 0);
19869
19870 emit_jump_insn (gen_jump (donelab));
19871 emit_barrier ();
19872
19873 emit_label (neglab);
19874
19875 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19876 1, OPTAB_DIRECT);
19877 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19878 1, OPTAB_DIRECT);
19879 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19880
19881 expand_float (f0, i0, 0);
19882
19883 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19884
19885 emit_label (donelab);
19886 }
19887 \f
19888 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19889 with all elements equal to VAR. Return true if successful. */
19890
19891 static bool
19892 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19893 rtx target, rtx val)
19894 {
19895 enum machine_mode smode, wsmode, wvmode;
19896 rtx x;
19897
19898 switch (mode)
19899 {
19900 case V2SImode:
19901 case V2SFmode:
19902 if (!mmx_ok)
19903 return false;
19904 /* FALLTHRU */
19905
19906 case V2DFmode:
19907 case V2DImode:
19908 case V4SFmode:
19909 case V4SImode:
19910 val = force_reg (GET_MODE_INNER (mode), val);
19911 x = gen_rtx_VEC_DUPLICATE (mode, val);
19912 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19913 return true;
19914
19915 case V4HImode:
19916 if (!mmx_ok)
19917 return false;
19918 if (TARGET_SSE || TARGET_3DNOW_A)
19919 {
19920 val = gen_lowpart (SImode, val);
19921 x = gen_rtx_TRUNCATE (HImode, val);
19922 x = gen_rtx_VEC_DUPLICATE (mode, x);
19923 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19924 return true;
19925 }
19926 else
19927 {
19928 smode = HImode;
19929 wsmode = SImode;
19930 wvmode = V2SImode;
19931 goto widen;
19932 }
19933
19934 case V8QImode:
19935 if (!mmx_ok)
19936 return false;
19937 smode = QImode;
19938 wsmode = HImode;
19939 wvmode = V4HImode;
19940 goto widen;
19941 case V8HImode:
19942 if (TARGET_SSE2)
19943 {
19944 rtx tmp1, tmp2;
19945 /* Extend HImode to SImode using a paradoxical SUBREG. */
19946 tmp1 = gen_reg_rtx (SImode);
19947 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19948 /* Insert the SImode value as low element of V4SImode vector. */
19949 tmp2 = gen_reg_rtx (V4SImode);
19950 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19951 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19952 CONST0_RTX (V4SImode),
19953 const1_rtx);
19954 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19955 /* Cast the V4SImode vector back to a V8HImode vector. */
19956 tmp1 = gen_reg_rtx (V8HImode);
19957 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19958 /* Duplicate the low short through the whole low SImode word. */
19959 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19960 /* Cast the V8HImode vector back to a V4SImode vector. */
19961 tmp2 = gen_reg_rtx (V4SImode);
19962 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19963 /* Replicate the low element of the V4SImode vector. */
19964 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19965 /* Cast the V2SImode back to V8HImode, and store in target. */
19966 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19967 return true;
19968 }
19969 smode = HImode;
19970 wsmode = SImode;
19971 wvmode = V4SImode;
19972 goto widen;
19973 case V16QImode:
19974 if (TARGET_SSE2)
19975 {
19976 rtx tmp1, tmp2;
19977 /* Extend QImode to SImode using a paradoxical SUBREG. */
19978 tmp1 = gen_reg_rtx (SImode);
19979 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19980 /* Insert the SImode value as low element of V4SImode vector. */
19981 tmp2 = gen_reg_rtx (V4SImode);
19982 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19983 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19984 CONST0_RTX (V4SImode),
19985 const1_rtx);
19986 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19987 /* Cast the V4SImode vector back to a V16QImode vector. */
19988 tmp1 = gen_reg_rtx (V16QImode);
19989 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19990 /* Duplicate the low byte through the whole low SImode word. */
19991 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19992 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19993 /* Cast the V16QImode vector back to a V4SImode vector. */
19994 tmp2 = gen_reg_rtx (V4SImode);
19995 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19996 /* Replicate the low element of the V4SImode vector. */
19997 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19998 /* Cast the V2SImode back to V16QImode, and store in target. */
19999 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20000 return true;
20001 }
20002 smode = QImode;
20003 wsmode = HImode;
20004 wvmode = V8HImode;
20005 goto widen;
20006 widen:
20007 /* Replicate the value once into the next wider mode and recurse. */
20008 val = convert_modes (wsmode, smode, val, true);
20009 x = expand_simple_binop (wsmode, ASHIFT, val,
20010 GEN_INT (GET_MODE_BITSIZE (smode)),
20011 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20012 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20013
20014 x = gen_reg_rtx (wvmode);
20015 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20016 gcc_unreachable ();
20017 emit_move_insn (target, gen_lowpart (mode, x));
20018 return true;
20019
20020 default:
20021 return false;
20022 }
20023 }
20024
20025 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20026 whose ONE_VAR element is VAR, and other elements are zero. Return true
20027 if successful. */
20028
20029 static bool
20030 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20031 rtx target, rtx var, int one_var)
20032 {
20033 enum machine_mode vsimode;
20034 rtx new_target;
20035 rtx x, tmp;
20036
20037 switch (mode)
20038 {
20039 case V2SFmode:
20040 case V2SImode:
20041 if (!mmx_ok)
20042 return false;
20043 /* FALLTHRU */
20044
20045 case V2DFmode:
20046 case V2DImode:
20047 if (one_var != 0)
20048 return false;
20049 var = force_reg (GET_MODE_INNER (mode), var);
20050 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20051 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20052 return true;
20053
20054 case V4SFmode:
20055 case V4SImode:
20056 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20057 new_target = gen_reg_rtx (mode);
20058 else
20059 new_target = target;
20060 var = force_reg (GET_MODE_INNER (mode), var);
20061 x = gen_rtx_VEC_DUPLICATE (mode, var);
20062 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20063 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20064 if (one_var != 0)
20065 {
20066 /* We need to shuffle the value to the correct position, so
20067 create a new pseudo to store the intermediate result. */
20068
20069 /* With SSE2, we can use the integer shuffle insns. */
20070 if (mode != V4SFmode && TARGET_SSE2)
20071 {
20072 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20073 GEN_INT (1),
20074 GEN_INT (one_var == 1 ? 0 : 1),
20075 GEN_INT (one_var == 2 ? 0 : 1),
20076 GEN_INT (one_var == 3 ? 0 : 1)));
20077 if (target != new_target)
20078 emit_move_insn (target, new_target);
20079 return true;
20080 }
20081
20082 /* Otherwise convert the intermediate result to V4SFmode and
20083 use the SSE1 shuffle instructions. */
20084 if (mode != V4SFmode)
20085 {
20086 tmp = gen_reg_rtx (V4SFmode);
20087 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20088 }
20089 else
20090 tmp = new_target;
20091
20092 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20093 GEN_INT (1),
20094 GEN_INT (one_var == 1 ? 0 : 1),
20095 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20096 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20097
20098 if (mode != V4SFmode)
20099 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20100 else if (tmp != target)
20101 emit_move_insn (target, tmp);
20102 }
20103 else if (target != new_target)
20104 emit_move_insn (target, new_target);
20105 return true;
20106
20107 case V8HImode:
20108 case V16QImode:
20109 vsimode = V4SImode;
20110 goto widen;
20111 case V4HImode:
20112 case V8QImode:
20113 if (!mmx_ok)
20114 return false;
20115 vsimode = V2SImode;
20116 goto widen;
20117 widen:
20118 if (one_var != 0)
20119 return false;
20120
20121 /* Zero extend the variable element to SImode and recurse. */
20122 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20123
20124 x = gen_reg_rtx (vsimode);
20125 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20126 var, one_var))
20127 gcc_unreachable ();
20128
20129 emit_move_insn (target, gen_lowpart (mode, x));
20130 return true;
20131
20132 default:
20133 return false;
20134 }
20135 }
20136
20137 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20138 consisting of the values in VALS. It is known that all elements
20139 except ONE_VAR are constants. Return true if successful. */
20140
20141 static bool
20142 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20143 rtx target, rtx vals, int one_var)
20144 {
20145 rtx var = XVECEXP (vals, 0, one_var);
20146 enum machine_mode wmode;
20147 rtx const_vec, x;
20148
20149 const_vec = copy_rtx (vals);
20150 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20151 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20152
20153 switch (mode)
20154 {
20155 case V2DFmode:
20156 case V2DImode:
20157 case V2SFmode:
20158 case V2SImode:
20159 /* For the two element vectors, it's just as easy to use
20160 the general case. */
20161 return false;
20162
20163 case V4SFmode:
20164 case V4SImode:
20165 case V8HImode:
20166 case V4HImode:
20167 break;
20168
20169 case V16QImode:
20170 wmode = V8HImode;
20171 goto widen;
20172 case V8QImode:
20173 wmode = V4HImode;
20174 goto widen;
20175 widen:
20176 /* There's no way to set one QImode entry easily. Combine
20177 the variable value with its adjacent constant value, and
20178 promote to an HImode set. */
20179 x = XVECEXP (vals, 0, one_var ^ 1);
20180 if (one_var & 1)
20181 {
20182 var = convert_modes (HImode, QImode, var, true);
20183 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20184 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20185 x = GEN_INT (INTVAL (x) & 0xff);
20186 }
20187 else
20188 {
20189 var = convert_modes (HImode, QImode, var, true);
20190 x = gen_int_mode (INTVAL (x) << 8, HImode);
20191 }
20192 if (x != const0_rtx)
20193 var = expand_simple_binop (HImode, IOR, var, x, var,
20194 1, OPTAB_LIB_WIDEN);
20195
20196 x = gen_reg_rtx (wmode);
20197 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20198 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20199
20200 emit_move_insn (target, gen_lowpart (mode, x));
20201 return true;
20202
20203 default:
20204 return false;
20205 }
20206
20207 emit_move_insn (target, const_vec);
20208 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20209 return true;
20210 }
20211
20212 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20213 all values variable, and none identical. */
20214
20215 static void
20216 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20217 rtx target, rtx vals)
20218 {
20219 enum machine_mode half_mode = GET_MODE_INNER (mode);
20220 rtx op0 = NULL, op1 = NULL;
20221 bool use_vec_concat = false;
20222
20223 switch (mode)
20224 {
20225 case V2SFmode:
20226 case V2SImode:
20227 if (!mmx_ok && !TARGET_SSE)
20228 break;
20229 /* FALLTHRU */
20230
20231 case V2DFmode:
20232 case V2DImode:
20233 /* For the two element vectors, we always implement VEC_CONCAT. */
20234 op0 = XVECEXP (vals, 0, 0);
20235 op1 = XVECEXP (vals, 0, 1);
20236 use_vec_concat = true;
20237 break;
20238
20239 case V4SFmode:
20240 half_mode = V2SFmode;
20241 goto half;
20242 case V4SImode:
20243 half_mode = V2SImode;
20244 goto half;
20245 half:
20246 {
20247 rtvec v;
20248
20249 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20250 Recurse to load the two halves. */
20251
20252 op0 = gen_reg_rtx (half_mode);
20253 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20254 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20255
20256 op1 = gen_reg_rtx (half_mode);
20257 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20258 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20259
20260 use_vec_concat = true;
20261 }
20262 break;
20263
20264 case V8HImode:
20265 case V16QImode:
20266 case V4HImode:
20267 case V8QImode:
20268 break;
20269
20270 default:
20271 gcc_unreachable ();
20272 }
20273
20274 if (use_vec_concat)
20275 {
20276 if (!register_operand (op0, half_mode))
20277 op0 = force_reg (half_mode, op0);
20278 if (!register_operand (op1, half_mode))
20279 op1 = force_reg (half_mode, op1);
20280
20281 emit_insn (gen_rtx_SET (VOIDmode, target,
20282 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20283 }
20284 else
20285 {
20286 int i, j, n_elts, n_words, n_elt_per_word;
20287 enum machine_mode inner_mode;
20288 rtx words[4], shift;
20289
20290 inner_mode = GET_MODE_INNER (mode);
20291 n_elts = GET_MODE_NUNITS (mode);
20292 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20293 n_elt_per_word = n_elts / n_words;
20294 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20295
20296 for (i = 0; i < n_words; ++i)
20297 {
20298 rtx word = NULL_RTX;
20299
20300 for (j = 0; j < n_elt_per_word; ++j)
20301 {
20302 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20303 elt = convert_modes (word_mode, inner_mode, elt, true);
20304
20305 if (j == 0)
20306 word = elt;
20307 else
20308 {
20309 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20310 word, 1, OPTAB_LIB_WIDEN);
20311 word = expand_simple_binop (word_mode, IOR, word, elt,
20312 word, 1, OPTAB_LIB_WIDEN);
20313 }
20314 }
20315
20316 words[i] = word;
20317 }
20318
20319 if (n_words == 1)
20320 emit_move_insn (target, gen_lowpart (mode, words[0]));
20321 else if (n_words == 2)
20322 {
20323 rtx tmp = gen_reg_rtx (mode);
20324 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20325 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20326 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20327 emit_move_insn (target, tmp);
20328 }
20329 else if (n_words == 4)
20330 {
20331 rtx tmp = gen_reg_rtx (V4SImode);
20332 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20333 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20334 emit_move_insn (target, gen_lowpart (mode, tmp));
20335 }
20336 else
20337 gcc_unreachable ();
20338 }
20339 }
20340
20341 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20342 instructions unless MMX_OK is true. */
20343
20344 void
20345 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20346 {
20347 enum machine_mode mode = GET_MODE (target);
20348 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20349 int n_elts = GET_MODE_NUNITS (mode);
20350 int n_var = 0, one_var = -1;
20351 bool all_same = true, all_const_zero = true;
20352 int i;
20353 rtx x;
20354
20355 for (i = 0; i < n_elts; ++i)
20356 {
20357 x = XVECEXP (vals, 0, i);
20358 if (!CONSTANT_P (x))
20359 n_var++, one_var = i;
20360 else if (x != CONST0_RTX (inner_mode))
20361 all_const_zero = false;
20362 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20363 all_same = false;
20364 }
20365
20366 /* Constants are best loaded from the constant pool. */
20367 if (n_var == 0)
20368 {
20369 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20370 return;
20371 }
20372
20373 /* If all values are identical, broadcast the value. */
20374 if (all_same
20375 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20376 XVECEXP (vals, 0, 0)))
20377 return;
20378
20379 /* Values where only one field is non-constant are best loaded from
20380 the pool and overwritten via move later. */
20381 if (n_var == 1)
20382 {
20383 if (all_const_zero
20384 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20385 XVECEXP (vals, 0, one_var),
20386 one_var))
20387 return;
20388
20389 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20390 return;
20391 }
20392
20393 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20394 }
20395
20396 void
20397 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20398 {
20399 enum machine_mode mode = GET_MODE (target);
20400 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20401 bool use_vec_merge = false;
20402 rtx tmp;
20403
20404 switch (mode)
20405 {
20406 case V2SFmode:
20407 case V2SImode:
20408 if (mmx_ok)
20409 {
20410 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20411 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20412 if (elt == 0)
20413 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20414 else
20415 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20416 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20417 return;
20418 }
20419 break;
20420
20421 case V2DFmode:
20422 case V2DImode:
20423 {
20424 rtx op0, op1;
20425
20426 /* For the two element vectors, we implement a VEC_CONCAT with
20427 the extraction of the other element. */
20428
20429 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20430 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20431
20432 if (elt == 0)
20433 op0 = val, op1 = tmp;
20434 else
20435 op0 = tmp, op1 = val;
20436
20437 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20438 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20439 }
20440 return;
20441
20442 case V4SFmode:
20443 switch (elt)
20444 {
20445 case 0:
20446 use_vec_merge = true;
20447 break;
20448
20449 case 1:
20450 /* tmp = target = A B C D */
20451 tmp = copy_to_reg (target);
20452 /* target = A A B B */
20453 emit_insn (gen_sse_unpcklps (target, target, target));
20454 /* target = X A B B */
20455 ix86_expand_vector_set (false, target, val, 0);
20456 /* target = A X C D */
20457 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20458 GEN_INT (1), GEN_INT (0),
20459 GEN_INT (2+4), GEN_INT (3+4)));
20460 return;
20461
20462 case 2:
20463 /* tmp = target = A B C D */
20464 tmp = copy_to_reg (target);
20465 /* tmp = X B C D */
20466 ix86_expand_vector_set (false, tmp, val, 0);
20467 /* target = A B X D */
20468 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20469 GEN_INT (0), GEN_INT (1),
20470 GEN_INT (0+4), GEN_INT (3+4)));
20471 return;
20472
20473 case 3:
20474 /* tmp = target = A B C D */
20475 tmp = copy_to_reg (target);
20476 /* tmp = X B C D */
20477 ix86_expand_vector_set (false, tmp, val, 0);
20478 /* target = A B X D */
20479 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20480 GEN_INT (0), GEN_INT (1),
20481 GEN_INT (2+4), GEN_INT (0+4)));
20482 return;
20483
20484 default:
20485 gcc_unreachable ();
20486 }
20487 break;
20488
20489 case V4SImode:
20490 /* Element 0 handled by vec_merge below. */
20491 if (elt == 0)
20492 {
20493 use_vec_merge = true;
20494 break;
20495 }
20496
20497 if (TARGET_SSE2)
20498 {
20499 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20500 store into element 0, then shuffle them back. */
20501
20502 rtx order[4];
20503
20504 order[0] = GEN_INT (elt);
20505 order[1] = const1_rtx;
20506 order[2] = const2_rtx;
20507 order[3] = GEN_INT (3);
20508 order[elt] = const0_rtx;
20509
20510 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20511 order[1], order[2], order[3]));
20512
20513 ix86_expand_vector_set (false, target, val, 0);
20514
20515 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20516 order[1], order[2], order[3]));
20517 }
20518 else
20519 {
20520 /* For SSE1, we have to reuse the V4SF code. */
20521 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20522 gen_lowpart (SFmode, val), elt);
20523 }
20524 return;
20525
20526 case V8HImode:
20527 use_vec_merge = TARGET_SSE2;
20528 break;
20529 case V4HImode:
20530 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20531 break;
20532
20533 case V16QImode:
20534 case V8QImode:
20535 default:
20536 break;
20537 }
20538
20539 if (use_vec_merge)
20540 {
20541 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20542 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20543 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20544 }
20545 else
20546 {
20547 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20548
20549 emit_move_insn (mem, target);
20550
20551 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20552 emit_move_insn (tmp, val);
20553
20554 emit_move_insn (target, mem);
20555 }
20556 }
20557
20558 void
20559 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20560 {
20561 enum machine_mode mode = GET_MODE (vec);
20562 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20563 bool use_vec_extr = false;
20564 rtx tmp;
20565
20566 switch (mode)
20567 {
20568 case V2SImode:
20569 case V2SFmode:
20570 if (!mmx_ok)
20571 break;
20572 /* FALLTHRU */
20573
20574 case V2DFmode:
20575 case V2DImode:
20576 use_vec_extr = true;
20577 break;
20578
20579 case V4SFmode:
20580 switch (elt)
20581 {
20582 case 0:
20583 tmp = vec;
20584 break;
20585
20586 case 1:
20587 case 3:
20588 tmp = gen_reg_rtx (mode);
20589 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20590 GEN_INT (elt), GEN_INT (elt),
20591 GEN_INT (elt+4), GEN_INT (elt+4)));
20592 break;
20593
20594 case 2:
20595 tmp = gen_reg_rtx (mode);
20596 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20597 break;
20598
20599 default:
20600 gcc_unreachable ();
20601 }
20602 vec = tmp;
20603 use_vec_extr = true;
20604 elt = 0;
20605 break;
20606
20607 case V4SImode:
20608 if (TARGET_SSE2)
20609 {
20610 switch (elt)
20611 {
20612 case 0:
20613 tmp = vec;
20614 break;
20615
20616 case 1:
20617 case 3:
20618 tmp = gen_reg_rtx (mode);
20619 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20620 GEN_INT (elt), GEN_INT (elt),
20621 GEN_INT (elt), GEN_INT (elt)));
20622 break;
20623
20624 case 2:
20625 tmp = gen_reg_rtx (mode);
20626 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20627 break;
20628
20629 default:
20630 gcc_unreachable ();
20631 }
20632 vec = tmp;
20633 use_vec_extr = true;
20634 elt = 0;
20635 }
20636 else
20637 {
20638 /* For SSE1, we have to reuse the V4SF code. */
20639 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20640 gen_lowpart (V4SFmode, vec), elt);
20641 return;
20642 }
20643 break;
20644
20645 case V8HImode:
20646 use_vec_extr = TARGET_SSE2;
20647 break;
20648 case V4HImode:
20649 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20650 break;
20651
20652 case V16QImode:
20653 case V8QImode:
20654 /* ??? Could extract the appropriate HImode element and shift. */
20655 default:
20656 break;
20657 }
20658
20659 if (use_vec_extr)
20660 {
20661 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20662 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20663
20664 /* Let the rtl optimizers know about the zero extension performed. */
20665 if (inner_mode == HImode)
20666 {
20667 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20668 target = gen_lowpart (SImode, target);
20669 }
20670
20671 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20672 }
20673 else
20674 {
20675 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20676
20677 emit_move_insn (mem, vec);
20678
20679 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20680 emit_move_insn (target, tmp);
20681 }
20682 }
20683
20684 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20685 pattern to reduce; DEST is the destination; IN is the input vector. */
20686
20687 void
20688 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20689 {
20690 rtx tmp1, tmp2, tmp3;
20691
20692 tmp1 = gen_reg_rtx (V4SFmode);
20693 tmp2 = gen_reg_rtx (V4SFmode);
20694 tmp3 = gen_reg_rtx (V4SFmode);
20695
20696 emit_insn (gen_sse_movhlps (tmp1, in, in));
20697 emit_insn (fn (tmp2, tmp1, in));
20698
20699 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20700 GEN_INT (1), GEN_INT (1),
20701 GEN_INT (1+4), GEN_INT (1+4)));
20702 emit_insn (fn (dest, tmp2, tmp3));
20703 }
20704 \f
20705 /* Target hook for scalar_mode_supported_p. */
20706 static bool
20707 ix86_scalar_mode_supported_p (enum machine_mode mode)
20708 {
20709 if (DECIMAL_FLOAT_MODE_P (mode))
20710 return true;
20711 else
20712 return default_scalar_mode_supported_p (mode);
20713 }
20714
20715 /* Implements target hook vector_mode_supported_p. */
20716 static bool
20717 ix86_vector_mode_supported_p (enum machine_mode mode)
20718 {
20719 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20720 return true;
20721 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20722 return true;
20723 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20724 return true;
20725 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20726 return true;
20727 return false;
20728 }
20729
20730 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20731
20732 We do this in the new i386 backend to maintain source compatibility
20733 with the old cc0-based compiler. */
20734
20735 static tree
20736 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20737 tree inputs ATTRIBUTE_UNUSED,
20738 tree clobbers)
20739 {
20740 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20741 clobbers);
20742 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20743 clobbers);
20744 return clobbers;
20745 }
20746
20747 /* Return true if this goes in small data/bss. */
20748
20749 static bool
20750 ix86_in_large_data_p (tree exp)
20751 {
20752 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20753 return false;
20754
20755 /* Functions are never large data. */
20756 if (TREE_CODE (exp) == FUNCTION_DECL)
20757 return false;
20758
20759 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20760 {
20761 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20762 if (strcmp (section, ".ldata") == 0
20763 || strcmp (section, ".lbss") == 0)
20764 return true;
20765 return false;
20766 }
20767 else
20768 {
20769 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20770
20771 /* If this is an incomplete type with size 0, then we can't put it
20772 in data because it might be too big when completed. */
20773 if (!size || size > ix86_section_threshold)
20774 return true;
20775 }
20776
20777 return false;
20778 }
20779 static void
20780 ix86_encode_section_info (tree decl, rtx rtl, int first)
20781 {
20782 default_encode_section_info (decl, rtl, first);
20783
20784 if (TREE_CODE (decl) == VAR_DECL
20785 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20786 && ix86_in_large_data_p (decl))
20787 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20788 }
20789
20790 /* Worker function for REVERSE_CONDITION. */
20791
20792 enum rtx_code
20793 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20794 {
20795 return (mode != CCFPmode && mode != CCFPUmode
20796 ? reverse_condition (code)
20797 : reverse_condition_maybe_unordered (code));
20798 }
20799
20800 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20801 to OPERANDS[0]. */
20802
20803 const char *
20804 output_387_reg_move (rtx insn, rtx *operands)
20805 {
20806 if (REG_P (operands[1])
20807 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20808 {
20809 if (REGNO (operands[0]) == FIRST_STACK_REG)
20810 return output_387_ffreep (operands, 0);
20811 return "fstp\t%y0";
20812 }
20813 if (STACK_TOP_P (operands[0]))
20814 return "fld%z1\t%y1";
20815 return "fst\t%y0";
20816 }
20817
20818 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20819 FP status register is set. */
20820
20821 void
20822 ix86_emit_fp_unordered_jump (rtx label)
20823 {
20824 rtx reg = gen_reg_rtx (HImode);
20825 rtx temp;
20826
20827 emit_insn (gen_x86_fnstsw_1 (reg));
20828
20829 if (TARGET_USE_SAHF)
20830 {
20831 emit_insn (gen_x86_sahf_1 (reg));
20832
20833 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20834 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20835 }
20836 else
20837 {
20838 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20839
20840 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20841 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20842 }
20843
20844 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20845 gen_rtx_LABEL_REF (VOIDmode, label),
20846 pc_rtx);
20847 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20848 emit_jump_insn (temp);
20849 }
20850
20851 /* Output code to perform a log1p XFmode calculation. */
20852
20853 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20854 {
20855 rtx label1 = gen_label_rtx ();
20856 rtx label2 = gen_label_rtx ();
20857
20858 rtx tmp = gen_reg_rtx (XFmode);
20859 rtx tmp2 = gen_reg_rtx (XFmode);
20860
20861 emit_insn (gen_absxf2 (tmp, op1));
20862 emit_insn (gen_cmpxf (tmp,
20863 CONST_DOUBLE_FROM_REAL_VALUE (
20864 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20865 XFmode)));
20866 emit_jump_insn (gen_bge (label1));
20867
20868 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20869 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20870 emit_jump (label2);
20871
20872 emit_label (label1);
20873 emit_move_insn (tmp, CONST1_RTX (XFmode));
20874 emit_insn (gen_addxf3 (tmp, op1, tmp));
20875 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20876 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20877
20878 emit_label (label2);
20879 }
20880
20881 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20882
20883 static void
20884 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20885 tree decl)
20886 {
20887 /* With Binutils 2.15, the "@unwind" marker must be specified on
20888 every occurrence of the ".eh_frame" section, not just the first
20889 one. */
20890 if (TARGET_64BIT
20891 && strcmp (name, ".eh_frame") == 0)
20892 {
20893 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20894 flags & SECTION_WRITE ? "aw" : "a");
20895 return;
20896 }
20897 default_elf_asm_named_section (name, flags, decl);
20898 }
20899
20900 /* Return the mangling of TYPE if it is an extended fundamental type. */
20901
20902 static const char *
20903 ix86_mangle_fundamental_type (tree type)
20904 {
20905 switch (TYPE_MODE (type))
20906 {
20907 case TFmode:
20908 /* __float128 is "g". */
20909 return "g";
20910 case XFmode:
20911 /* "long double" or __float80 is "e". */
20912 return "e";
20913 default:
20914 return NULL;
20915 }
20916 }
20917
20918 /* For 32-bit code we can save PIC register setup by using
20919 __stack_chk_fail_local hidden function instead of calling
20920 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20921 register, so it is better to call __stack_chk_fail directly. */
20922
20923 static tree
20924 ix86_stack_protect_fail (void)
20925 {
20926 return TARGET_64BIT
20927 ? default_external_stack_protect_fail ()
20928 : default_hidden_stack_protect_fail ();
20929 }
20930
20931 /* Select a format to encode pointers in exception handling data. CODE
20932 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20933 true if the symbol may be affected by dynamic relocations.
20934
20935 ??? All x86 object file formats are capable of representing this.
20936 After all, the relocation needed is the same as for the call insn.
20937 Whether or not a particular assembler allows us to enter such, I
20938 guess we'll have to see. */
20939 int
20940 asm_preferred_eh_data_format (int code, int global)
20941 {
20942 if (flag_pic)
20943 {
20944 int type = DW_EH_PE_sdata8;
20945 if (!TARGET_64BIT
20946 || ix86_cmodel == CM_SMALL_PIC
20947 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20948 type = DW_EH_PE_sdata4;
20949 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20950 }
20951 if (ix86_cmodel == CM_SMALL
20952 || (ix86_cmodel == CM_MEDIUM && code))
20953 return DW_EH_PE_udata4;
20954 return DW_EH_PE_absptr;
20955 }
20956 \f
20957 /* Expand copysign from SIGN to the positive value ABS_VALUE
20958 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20959 the sign-bit. */
20960 static void
20961 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20962 {
20963 enum machine_mode mode = GET_MODE (sign);
20964 rtx sgn = gen_reg_rtx (mode);
20965 if (mask == NULL_RTX)
20966 {
20967 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20968 if (!VECTOR_MODE_P (mode))
20969 {
20970 /* We need to generate a scalar mode mask in this case. */
20971 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20972 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20973 mask = gen_reg_rtx (mode);
20974 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20975 }
20976 }
20977 else
20978 mask = gen_rtx_NOT (mode, mask);
20979 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20980 gen_rtx_AND (mode, mask, sign)));
20981 emit_insn (gen_rtx_SET (VOIDmode, result,
20982 gen_rtx_IOR (mode, abs_value, sgn)));
20983 }
20984
20985 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20986 mask for masking out the sign-bit is stored in *SMASK, if that is
20987 non-null. */
20988 static rtx
20989 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20990 {
20991 enum machine_mode mode = GET_MODE (op0);
20992 rtx xa, mask;
20993
20994 xa = gen_reg_rtx (mode);
20995 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20996 if (!VECTOR_MODE_P (mode))
20997 {
20998 /* We need to generate a scalar mode mask in this case. */
20999 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21000 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21001 mask = gen_reg_rtx (mode);
21002 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21003 }
21004 emit_insn (gen_rtx_SET (VOIDmode, xa,
21005 gen_rtx_AND (mode, op0, mask)));
21006
21007 if (smask)
21008 *smask = mask;
21009
21010 return xa;
21011 }
21012
21013 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21014 swapping the operands if SWAP_OPERANDS is true. The expanded
21015 code is a forward jump to a newly created label in case the
21016 comparison is true. The generated label rtx is returned. */
21017 static rtx
21018 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21019 bool swap_operands)
21020 {
21021 rtx label, tmp;
21022
21023 if (swap_operands)
21024 {
21025 tmp = op0;
21026 op0 = op1;
21027 op1 = tmp;
21028 }
21029
21030 label = gen_label_rtx ();
21031 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21032 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21033 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21034 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21035 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21036 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21037 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21038 JUMP_LABEL (tmp) = label;
21039
21040 return label;
21041 }
21042
21043 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21044 using comparison code CODE. Operands are swapped for the comparison if
21045 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21046 static rtx
21047 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21048 bool swap_operands)
21049 {
21050 enum machine_mode mode = GET_MODE (op0);
21051 rtx mask = gen_reg_rtx (mode);
21052
21053 if (swap_operands)
21054 {
21055 rtx tmp = op0;
21056 op0 = op1;
21057 op1 = tmp;
21058 }
21059
21060 if (mode == DFmode)
21061 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21062 gen_rtx_fmt_ee (code, mode, op0, op1)));
21063 else
21064 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21065 gen_rtx_fmt_ee (code, mode, op0, op1)));
21066
21067 return mask;
21068 }
21069
21070 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21071 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21072 static rtx
21073 ix86_gen_TWO52 (enum machine_mode mode)
21074 {
21075 REAL_VALUE_TYPE TWO52r;
21076 rtx TWO52;
21077
21078 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21079 TWO52 = const_double_from_real_value (TWO52r, mode);
21080 TWO52 = force_reg (mode, TWO52);
21081
21082 return TWO52;
21083 }
21084
21085 /* Expand SSE sequence for computing lround from OP1 storing
21086 into OP0. */
21087 void
21088 ix86_expand_lround (rtx op0, rtx op1)
21089 {
21090 /* C code for the stuff we're doing below:
21091 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21092 return (long)tmp;
21093 */
21094 enum machine_mode mode = GET_MODE (op1);
21095 const struct real_format *fmt;
21096 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21097 rtx adj;
21098
21099 /* load nextafter (0.5, 0.0) */
21100 fmt = REAL_MODE_FORMAT (mode);
21101 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21102 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21103
21104 /* adj = copysign (0.5, op1) */
21105 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21106 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21107
21108 /* adj = op1 + adj */
21109 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21110
21111 /* op0 = (imode)adj */
21112 expand_fix (op0, adj, 0);
21113 }
21114
21115 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21116 into OPERAND0. */
21117 void
21118 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21119 {
21120 /* C code for the stuff we're doing below (for do_floor):
21121 xi = (long)op1;
21122 xi -= (double)xi > op1 ? 1 : 0;
21123 return xi;
21124 */
21125 enum machine_mode fmode = GET_MODE (op1);
21126 enum machine_mode imode = GET_MODE (op0);
21127 rtx ireg, freg, label, tmp;
21128
21129 /* reg = (long)op1 */
21130 ireg = gen_reg_rtx (imode);
21131 expand_fix (ireg, op1, 0);
21132
21133 /* freg = (double)reg */
21134 freg = gen_reg_rtx (fmode);
21135 expand_float (freg, ireg, 0);
21136
21137 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21138 label = ix86_expand_sse_compare_and_jump (UNLE,
21139 freg, op1, !do_floor);
21140 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21141 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21142 emit_move_insn (ireg, tmp);
21143
21144 emit_label (label);
21145 LABEL_NUSES (label) = 1;
21146
21147 emit_move_insn (op0, ireg);
21148 }
21149
21150 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21151 result in OPERAND0. */
21152 void
21153 ix86_expand_rint (rtx operand0, rtx operand1)
21154 {
21155 /* C code for the stuff we're doing below:
21156 xa = fabs (operand1);
21157 if (!isless (xa, 2**52))
21158 return operand1;
21159 xa = xa + 2**52 - 2**52;
21160 return copysign (xa, operand1);
21161 */
21162 enum machine_mode mode = GET_MODE (operand0);
21163 rtx res, xa, label, TWO52, mask;
21164
21165 res = gen_reg_rtx (mode);
21166 emit_move_insn (res, operand1);
21167
21168 /* xa = abs (operand1) */
21169 xa = ix86_expand_sse_fabs (res, &mask);
21170
21171 /* if (!isless (xa, TWO52)) goto label; */
21172 TWO52 = ix86_gen_TWO52 (mode);
21173 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21174
21175 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21176 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21177
21178 ix86_sse_copysign_to_positive (res, xa, res, mask);
21179
21180 emit_label (label);
21181 LABEL_NUSES (label) = 1;
21182
21183 emit_move_insn (operand0, res);
21184 }
21185
21186 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21187 into OPERAND0. */
21188 void
21189 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21190 {
21191 /* C code for the stuff we expand below.
21192 double xa = fabs (x), x2;
21193 if (!isless (xa, TWO52))
21194 return x;
21195 xa = xa + TWO52 - TWO52;
21196 x2 = copysign (xa, x);
21197 Compensate. Floor:
21198 if (x2 > x)
21199 x2 -= 1;
21200 Compensate. Ceil:
21201 if (x2 < x)
21202 x2 -= -1;
21203 return x2;
21204 */
21205 enum machine_mode mode = GET_MODE (operand0);
21206 rtx xa, TWO52, tmp, label, one, res, mask;
21207
21208 TWO52 = ix86_gen_TWO52 (mode);
21209
21210 /* Temporary for holding the result, initialized to the input
21211 operand to ease control flow. */
21212 res = gen_reg_rtx (mode);
21213 emit_move_insn (res, operand1);
21214
21215 /* xa = abs (operand1) */
21216 xa = ix86_expand_sse_fabs (res, &mask);
21217
21218 /* if (!isless (xa, TWO52)) goto label; */
21219 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21220
21221 /* xa = xa + TWO52 - TWO52; */
21222 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21223 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21224
21225 /* xa = copysign (xa, operand1) */
21226 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21227
21228 /* generate 1.0 or -1.0 */
21229 one = force_reg (mode,
21230 const_double_from_real_value (do_floor
21231 ? dconst1 : dconstm1, mode));
21232
21233 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21234 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21235 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21236 gen_rtx_AND (mode, one, tmp)));
21237 /* We always need to subtract here to preserve signed zero. */
21238 tmp = expand_simple_binop (mode, MINUS,
21239 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21240 emit_move_insn (res, tmp);
21241
21242 emit_label (label);
21243 LABEL_NUSES (label) = 1;
21244
21245 emit_move_insn (operand0, res);
21246 }
21247
21248 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21249 into OPERAND0. */
21250 void
21251 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21252 {
21253 /* C code for the stuff we expand below.
21254 double xa = fabs (x), x2;
21255 if (!isless (xa, TWO52))
21256 return x;
21257 x2 = (double)(long)x;
21258 Compensate. Floor:
21259 if (x2 > x)
21260 x2 -= 1;
21261 Compensate. Ceil:
21262 if (x2 < x)
21263 x2 += 1;
21264 if (HONOR_SIGNED_ZEROS (mode))
21265 return copysign (x2, x);
21266 return x2;
21267 */
21268 enum machine_mode mode = GET_MODE (operand0);
21269 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21270
21271 TWO52 = ix86_gen_TWO52 (mode);
21272
21273 /* Temporary for holding the result, initialized to the input
21274 operand to ease control flow. */
21275 res = gen_reg_rtx (mode);
21276 emit_move_insn (res, operand1);
21277
21278 /* xa = abs (operand1) */
21279 xa = ix86_expand_sse_fabs (res, &mask);
21280
21281 /* if (!isless (xa, TWO52)) goto label; */
21282 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21283
21284 /* xa = (double)(long)x */
21285 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21286 expand_fix (xi, res, 0);
21287 expand_float (xa, xi, 0);
21288
21289 /* generate 1.0 */
21290 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21291
21292 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21293 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21294 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21295 gen_rtx_AND (mode, one, tmp)));
21296 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21297 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21298 emit_move_insn (res, tmp);
21299
21300 if (HONOR_SIGNED_ZEROS (mode))
21301 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21302
21303 emit_label (label);
21304 LABEL_NUSES (label) = 1;
21305
21306 emit_move_insn (operand0, res);
21307 }
21308
21309 /* Expand SSE sequence for computing round from OPERAND1 storing
21310 into OPERAND0. Sequence that works without relying on DImode truncation
21311 via cvttsd2siq that is only available on 64bit targets. */
21312 void
21313 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21314 {
21315 /* C code for the stuff we expand below.
21316 double xa = fabs (x), xa2, x2;
21317 if (!isless (xa, TWO52))
21318 return x;
21319 Using the absolute value and copying back sign makes
21320 -0.0 -> -0.0 correct.
21321 xa2 = xa + TWO52 - TWO52;
21322 Compensate.
21323 dxa = xa2 - xa;
21324 if (dxa <= -0.5)
21325 xa2 += 1;
21326 else if (dxa > 0.5)
21327 xa2 -= 1;
21328 x2 = copysign (xa2, x);
21329 return x2;
21330 */
21331 enum machine_mode mode = GET_MODE (operand0);
21332 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21333
21334 TWO52 = ix86_gen_TWO52 (mode);
21335
21336 /* Temporary for holding the result, initialized to the input
21337 operand to ease control flow. */
21338 res = gen_reg_rtx (mode);
21339 emit_move_insn (res, operand1);
21340
21341 /* xa = abs (operand1) */
21342 xa = ix86_expand_sse_fabs (res, &mask);
21343
21344 /* if (!isless (xa, TWO52)) goto label; */
21345 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21346
21347 /* xa2 = xa + TWO52 - TWO52; */
21348 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21349 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21350
21351 /* dxa = xa2 - xa; */
21352 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21353
21354 /* generate 0.5, 1.0 and -0.5 */
21355 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21356 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21357 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21358 0, OPTAB_DIRECT);
21359
21360 /* Compensate. */
21361 tmp = gen_reg_rtx (mode);
21362 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21363 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21364 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21365 gen_rtx_AND (mode, one, tmp)));
21366 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21367 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21368 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21369 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21370 gen_rtx_AND (mode, one, tmp)));
21371 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21372
21373 /* res = copysign (xa2, operand1) */
21374 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21375
21376 emit_label (label);
21377 LABEL_NUSES (label) = 1;
21378
21379 emit_move_insn (operand0, res);
21380 }
21381
21382 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21383 into OPERAND0. */
21384 void
21385 ix86_expand_trunc (rtx operand0, rtx operand1)
21386 {
21387 /* C code for SSE variant we expand below.
21388 double xa = fabs (x), x2;
21389 if (!isless (xa, TWO52))
21390 return x;
21391 x2 = (double)(long)x;
21392 if (HONOR_SIGNED_ZEROS (mode))
21393 return copysign (x2, x);
21394 return x2;
21395 */
21396 enum machine_mode mode = GET_MODE (operand0);
21397 rtx xa, xi, TWO52, label, res, mask;
21398
21399 TWO52 = ix86_gen_TWO52 (mode);
21400
21401 /* Temporary for holding the result, initialized to the input
21402 operand to ease control flow. */
21403 res = gen_reg_rtx (mode);
21404 emit_move_insn (res, operand1);
21405
21406 /* xa = abs (operand1) */
21407 xa = ix86_expand_sse_fabs (res, &mask);
21408
21409 /* if (!isless (xa, TWO52)) goto label; */
21410 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21411
21412 /* x = (double)(long)x */
21413 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21414 expand_fix (xi, res, 0);
21415 expand_float (res, xi, 0);
21416
21417 if (HONOR_SIGNED_ZEROS (mode))
21418 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21419
21420 emit_label (label);
21421 LABEL_NUSES (label) = 1;
21422
21423 emit_move_insn (operand0, res);
21424 }
21425
21426 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21427 into OPERAND0. */
21428 void
21429 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21430 {
21431 enum machine_mode mode = GET_MODE (operand0);
21432 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21433
21434 /* C code for SSE variant we expand below.
21435 double xa = fabs (x), x2;
21436 if (!isless (xa, TWO52))
21437 return x;
21438 xa2 = xa + TWO52 - TWO52;
21439 Compensate:
21440 if (xa2 > xa)
21441 xa2 -= 1.0;
21442 x2 = copysign (xa2, x);
21443 return x2;
21444 */
21445
21446 TWO52 = ix86_gen_TWO52 (mode);
21447
21448 /* Temporary for holding the result, initialized to the input
21449 operand to ease control flow. */
21450 res = gen_reg_rtx (mode);
21451 emit_move_insn (res, operand1);
21452
21453 /* xa = abs (operand1) */
21454 xa = ix86_expand_sse_fabs (res, &smask);
21455
21456 /* if (!isless (xa, TWO52)) goto label; */
21457 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21458
21459 /* res = xa + TWO52 - TWO52; */
21460 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21461 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21462 emit_move_insn (res, tmp);
21463
21464 /* generate 1.0 */
21465 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21466
21467 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21468 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21469 emit_insn (gen_rtx_SET (VOIDmode, mask,
21470 gen_rtx_AND (mode, mask, one)));
21471 tmp = expand_simple_binop (mode, MINUS,
21472 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21473 emit_move_insn (res, tmp);
21474
21475 /* res = copysign (res, operand1) */
21476 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21477
21478 emit_label (label);
21479 LABEL_NUSES (label) = 1;
21480
21481 emit_move_insn (operand0, res);
21482 }
21483
21484 /* Expand SSE sequence for computing round from OPERAND1 storing
21485 into OPERAND0. */
21486 void
21487 ix86_expand_round (rtx operand0, rtx operand1)
21488 {
21489 /* C code for the stuff we're doing below:
21490 double xa = fabs (x);
21491 if (!isless (xa, TWO52))
21492 return x;
21493 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21494 return copysign (xa, x);
21495 */
21496 enum machine_mode mode = GET_MODE (operand0);
21497 rtx res, TWO52, xa, label, xi, half, mask;
21498 const struct real_format *fmt;
21499 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21500
21501 /* Temporary for holding the result, initialized to the input
21502 operand to ease control flow. */
21503 res = gen_reg_rtx (mode);
21504 emit_move_insn (res, operand1);
21505
21506 TWO52 = ix86_gen_TWO52 (mode);
21507 xa = ix86_expand_sse_fabs (res, &mask);
21508 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21509
21510 /* load nextafter (0.5, 0.0) */
21511 fmt = REAL_MODE_FORMAT (mode);
21512 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21513 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21514
21515 /* xa = xa + 0.5 */
21516 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21517 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21518
21519 /* xa = (double)(int64_t)xa */
21520 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21521 expand_fix (xi, xa, 0);
21522 expand_float (xa, xi, 0);
21523
21524 /* res = copysign (xa, operand1) */
21525 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21526
21527 emit_label (label);
21528 LABEL_NUSES (label) = 1;
21529
21530 emit_move_insn (operand0, res);
21531 }
21532
21533 #include "gt-i386.h"