re PR target/30778 (invalid code generation for memset() with -mtune=k8)
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_GEODE (1<<PROCESSOR_GEODE)
988 #define m_K6_GEODE (m_K6 | m_GEODE)
989 #define m_K6 (1<<PROCESSOR_K6)
990 #define m_ATHLON (1<<PROCESSOR_ATHLON)
991 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
992 #define m_K8 (1<<PROCESSOR_K8)
993 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
994 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
995 #define m_NOCONA (1<<PROCESSOR_NOCONA)
996 #define m_CORE2 (1<<PROCESSOR_CORE2)
997 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
998 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
999 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1000 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1001
1002 /* Generic instruction choice should be common subset of supported CPUs
1003 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1004
1005 /* Leave is not affecting Nocona SPEC2000 results negatively, so enabling for
1006 Generic64 seems like good code size tradeoff. We can't enable it for 32bit
1007 generic because it is not working well with PPro base chips. */
1008 const int x86_use_leave = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2
1009 | m_GENERIC64;
1010 const int x86_push_memory = m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1011 | m_NOCONA | m_CORE2 | m_GENERIC;
1012 const int x86_zero_extend_with_and = m_486 | m_PENT;
1013 /* Enable to zero extend integer registers to avoid partial dependencies */
1014 const int x86_movx = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1015 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */;
1016 const int x86_double_with_add = ~m_386;
1017 const int x86_use_bit_test = m_386;
1018 const int x86_unroll_strlen = m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10
1019 | m_K6 | m_CORE2 | m_GENERIC;
1020 const int x86_cmove = m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1021 | m_NOCONA;
1022 const int x86_3dnow_a = m_ATHLON_K8_AMDFAM10;
1023 const int x86_deep_branch = m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10
1024 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1025 /* Branch hints were put in P4 based on simulation result. But
1026 after P4 was made, no performance benefit was observed with
1027 branch hints. It also increases the code size. As the result,
1028 icc never generates branch hints. */
1029 const int x86_branch_hints = 0;
1030 const int x86_use_sahf = m_PPRO | m_K6_GEODE | m_PENT4 | m_NOCONA | m_GENERIC32;
1031 /*m_GENERIC | m_ATHLON_K8 ? */
1032 /* We probably ought to watch for partial register stalls on Generic32
1033 compilation setting as well. However in current implementation the
1034 partial register stalls are not eliminated very well - they can
1035 be introduced via subregs synthesized by combine and can happen
1036 in caller/callee saving sequences.
1037 Because this option pays back little on PPro based chips and is in conflict
1038 with partial reg. dependencies used by Athlon/P4 based chips, it is better
1039 to leave it off for generic32 for now. */
1040 const int x86_partial_reg_stall = m_PPRO;
1041 const int x86_partial_flag_reg_stall = m_CORE2 | m_GENERIC;
1042 const int x86_use_himode_fiop = m_386 | m_486 | m_K6_GEODE;
1043 const int x86_use_simode_fiop = ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT
1044 | m_CORE2 | m_GENERIC);
1045 const int x86_use_mov0 = m_K6;
1046 const int x86_use_cltd = ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC);
1047 /* Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1048 const int x86_use_xchgb = m_PENT4;
1049 const int x86_read_modify_write = ~m_PENT;
1050 const int x86_read_modify = ~(m_PENT | m_PPRO);
1051 const int x86_split_long_moves = m_PPRO;
1052 const int x86_promote_QImode = m_K6_GEODE | m_PENT | m_386 | m_486
1053 | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1054 /* m_PENT4 ? */
1055 const int x86_fast_prefix = ~(m_PENT | m_486 | m_386);
1056 const int x86_single_stringop = m_386 | m_PENT4 | m_NOCONA;
1057 const int x86_qimode_math = ~(0);
1058 const int x86_promote_qi_regs = 0;
1059 /* On PPro this flag is meant to avoid partial register stalls. Just like
1060 the x86_partial_reg_stall this option might be considered for Generic32
1061 if our scheme for avoiding partial stalls was more effective. */
1062 const int x86_himode_math = ~(m_PPRO);
1063 const int x86_promote_hi_regs = m_PPRO;
1064 /* Enable if add/sub rsp is preferred over 1 or 2 push/pop */
1065 const int x86_sub_esp_4 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1066 | m_CORE2 | m_GENERIC;
1067 const int x86_sub_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1068 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1069 const int x86_add_esp_4 = m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA
1070 | m_CORE2 | m_GENERIC;
1071 const int x86_add_esp_8 = m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1072 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC;
1073 /* Enable if integer moves are preferred for DFmode copies */
1074 const int x86_integer_DFmode_moves = ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1075 | m_PPRO | m_CORE2 | m_GENERIC | m_GEODE);
1076 const int x86_partial_reg_dependency = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1077 | m_CORE2 | m_GENERIC;
1078 const int x86_memory_mismatch_stall = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA
1079 | m_CORE2 | m_GENERIC;
1080 /* If ACCUMULATE_OUTGOING_ARGS is enabled, the maximum amount of space required
1081 for outgoing arguments will be computed and placed into the variable
1082 `current_function_outgoing_args_size'. No space will be pushed onto the stack
1083 for each call; instead, the function prologue should increase the stack frame
1084 size by this amount. Setting both PUSH_ARGS and ACCUMULATE_OUTGOING_ARGS is
1085 not proper. */
1086 const int x86_accumulate_outgoing_args = m_ATHLON_K8_AMDFAM10 | m_PENT4
1087 | m_NOCONA | m_PPRO | m_CORE2
1088 | m_GENERIC;
1089 const int x86_prologue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1090 const int x86_epilogue_using_move = m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC;
1091 const int x86_shift1 = ~m_486;
1092 const int x86_arch_always_fancy_math_387 = m_PENT | m_PPRO
1093 | m_ATHLON_K8_AMDFAM10 | m_PENT4
1094 | m_NOCONA | m_CORE2 | m_GENERIC;
1095 /* In Generic model we have an conflict here in between PPro/Pentium4 based chips
1096 that thread 128bit SSE registers as single units versus K8 based chips that
1097 divide SSE registers to two 64bit halves.
1098 x86_sse_partial_reg_dependency promote all store destinations to be 128bit
1099 to allow register renaming on 128bit SSE units, but usually results in one
1100 extra microop on 64bit SSE units. Experimental results shows that disabling
1101 this option on P4 brings over 20% SPECfp regression, while enabling it on
1102 K8 brings roughly 2.4% regression that can be partly masked by careful scheduling
1103 of moves. */
1104 const int x86_sse_partial_reg_dependency = m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1105 | m_GENERIC | m_AMDFAM10;
1106 /* Set for machines where the type and dependencies are resolved on SSE
1107 register parts instead of whole registers, so we may maintain just
1108 lower part of scalar values in proper format leaving the upper part
1109 undefined. */
1110 const int x86_sse_split_regs = m_ATHLON_K8;
1111 /* Code generation for scalar reg-reg moves of single and double precision data:
1112 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
1113 movaps reg, reg
1114 else
1115 movss reg, reg
1116 if (x86_sse_partial_reg_dependency == true)
1117 movapd reg, reg
1118 else
1119 movsd reg, reg
1120
1121 Code generation for scalar loads of double precision data:
1122 if (x86_sse_split_regs == true)
1123 movlpd mem, reg (gas syntax)
1124 else
1125 movsd mem, reg
1126
1127 Code generation for unaligned packed loads of single precision data
1128 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
1129 if (x86_sse_unaligned_move_optimal)
1130 movups mem, reg
1131
1132 if (x86_sse_partial_reg_dependency == true)
1133 {
1134 xorps reg, reg
1135 movlps mem, reg
1136 movhps mem+8, reg
1137 }
1138 else
1139 {
1140 movlps mem, reg
1141 movhps mem+8, reg
1142 }
1143
1144 Code generation for unaligned packed loads of double precision data
1145 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
1146 if (x86_sse_unaligned_move_optimal)
1147 movupd mem, reg
1148
1149 if (x86_sse_split_regs == true)
1150 {
1151 movlpd mem, reg
1152 movhpd mem+8, reg
1153 }
1154 else
1155 {
1156 movsd mem, reg
1157 movhpd mem+8, reg
1158 }
1159 */
1160 const int x86_sse_unaligned_move_optimal = m_AMDFAM10;
1161 const int x86_sse_typeless_stores = m_ATHLON_K8_AMDFAM10;
1162 const int x86_sse_load0_by_pxor = m_PPRO | m_PENT4 | m_NOCONA;
1163 const int x86_use_ffreep = m_ATHLON_K8_AMDFAM10;
1164 const int x86_use_incdec = ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC);
1165
1166 const int x86_inter_unit_moves = ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC);
1167
1168 const int x86_ext_80387_constants = m_K6_GEODE | m_ATHLON_K8 | m_PENT4
1169 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1170 /* Some CPU cores are not able to predict more than 4 branch instructions in
1171 the 16 byte window. */
1172 const int x86_four_jump_limit = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1173 | m_NOCONA | m_CORE2 | m_GENERIC;
1174 const int x86_schedule = m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT
1175 | m_CORE2 | m_GENERIC;
1176 const int x86_use_bt = m_ATHLON_K8_AMDFAM10;
1177 /* Compare and exchange was added for 80486. */
1178 const int x86_cmpxchg = ~m_386;
1179 /* Compare and exchange 8 bytes was added for pentium. */
1180 const int x86_cmpxchg8b = ~(m_386 | m_486);
1181 /* Exchange and add was added for 80486. */
1182 const int x86_xadd = ~m_386;
1183 /* Byteswap was added for 80486. */
1184 const int x86_bswap = ~m_386;
1185 const int x86_pad_returns = m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC;
1186
1187 static enum stringop_alg stringop_alg = no_stringop;
1188
1189 /* In case the average insn count for single function invocation is
1190 lower than this constant, emit fast (but longer) prologue and
1191 epilogue code. */
1192 #define FAST_PROLOGUE_INSN_COUNT 20
1193
1194 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1195 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1196 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1197 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1198
1199 /* Array of the smallest class containing reg number REGNO, indexed by
1200 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1201
1202 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1203 {
1204 /* ax, dx, cx, bx */
1205 AREG, DREG, CREG, BREG,
1206 /* si, di, bp, sp */
1207 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1208 /* FP registers */
1209 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1210 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1211 /* arg pointer */
1212 NON_Q_REGS,
1213 /* flags, fpsr, fpcr, frame */
1214 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1215 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1216 SSE_REGS, SSE_REGS,
1217 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1218 MMX_REGS, MMX_REGS,
1219 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1220 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1221 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1222 SSE_REGS, SSE_REGS,
1223 };
1224
1225 /* The "default" register map used in 32bit mode. */
1226
1227 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1228 {
1229 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1230 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1231 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1232 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1233 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1234 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1235 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1236 };
1237
1238 static int const x86_64_int_parameter_registers[6] =
1239 {
1240 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1241 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1242 };
1243
1244 static int const x86_64_int_return_registers[4] =
1245 {
1246 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1247 };
1248
1249 /* The "default" register map used in 64bit mode. */
1250 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1251 {
1252 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1253 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1254 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1255 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1256 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1257 8,9,10,11,12,13,14,15, /* extended integer registers */
1258 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1259 };
1260
1261 /* Define the register numbers to be used in Dwarf debugging information.
1262 The SVR4 reference port C compiler uses the following register numbers
1263 in its Dwarf output code:
1264 0 for %eax (gcc regno = 0)
1265 1 for %ecx (gcc regno = 2)
1266 2 for %edx (gcc regno = 1)
1267 3 for %ebx (gcc regno = 3)
1268 4 for %esp (gcc regno = 7)
1269 5 for %ebp (gcc regno = 6)
1270 6 for %esi (gcc regno = 4)
1271 7 for %edi (gcc regno = 5)
1272 The following three DWARF register numbers are never generated by
1273 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1274 believes these numbers have these meanings.
1275 8 for %eip (no gcc equivalent)
1276 9 for %eflags (gcc regno = 17)
1277 10 for %trapno (no gcc equivalent)
1278 It is not at all clear how we should number the FP stack registers
1279 for the x86 architecture. If the version of SDB on x86/svr4 were
1280 a bit less brain dead with respect to floating-point then we would
1281 have a precedent to follow with respect to DWARF register numbers
1282 for x86 FP registers, but the SDB on x86/svr4 is so completely
1283 broken with respect to FP registers that it is hardly worth thinking
1284 of it as something to strive for compatibility with.
1285 The version of x86/svr4 SDB I have at the moment does (partially)
1286 seem to believe that DWARF register number 11 is associated with
1287 the x86 register %st(0), but that's about all. Higher DWARF
1288 register numbers don't seem to be associated with anything in
1289 particular, and even for DWARF regno 11, SDB only seems to under-
1290 stand that it should say that a variable lives in %st(0) (when
1291 asked via an `=' command) if we said it was in DWARF regno 11,
1292 but SDB still prints garbage when asked for the value of the
1293 variable in question (via a `/' command).
1294 (Also note that the labels SDB prints for various FP stack regs
1295 when doing an `x' command are all wrong.)
1296 Note that these problems generally don't affect the native SVR4
1297 C compiler because it doesn't allow the use of -O with -g and
1298 because when it is *not* optimizing, it allocates a memory
1299 location for each floating-point variable, and the memory
1300 location is what gets described in the DWARF AT_location
1301 attribute for the variable in question.
1302 Regardless of the severe mental illness of the x86/svr4 SDB, we
1303 do something sensible here and we use the following DWARF
1304 register numbers. Note that these are all stack-top-relative
1305 numbers.
1306 11 for %st(0) (gcc regno = 8)
1307 12 for %st(1) (gcc regno = 9)
1308 13 for %st(2) (gcc regno = 10)
1309 14 for %st(3) (gcc regno = 11)
1310 15 for %st(4) (gcc regno = 12)
1311 16 for %st(5) (gcc regno = 13)
1312 17 for %st(6) (gcc regno = 14)
1313 18 for %st(7) (gcc regno = 15)
1314 */
1315 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1316 {
1317 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1318 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1319 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1320 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1321 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1322 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1323 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1324 };
1325
1326 /* Test and compare insns in i386.md store the information needed to
1327 generate branch and scc insns here. */
1328
1329 rtx ix86_compare_op0 = NULL_RTX;
1330 rtx ix86_compare_op1 = NULL_RTX;
1331 rtx ix86_compare_emitted = NULL_RTX;
1332
1333 /* Size of the register save area. */
1334 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1335
1336 /* Define the structure for the machine field in struct function. */
1337
1338 struct stack_local_entry GTY(())
1339 {
1340 unsigned short mode;
1341 unsigned short n;
1342 rtx rtl;
1343 struct stack_local_entry *next;
1344 };
1345
1346 /* Structure describing stack frame layout.
1347 Stack grows downward:
1348
1349 [arguments]
1350 <- ARG_POINTER
1351 saved pc
1352
1353 saved frame pointer if frame_pointer_needed
1354 <- HARD_FRAME_POINTER
1355 [saved regs]
1356
1357 [padding1] \
1358 )
1359 [va_arg registers] (
1360 > to_allocate <- FRAME_POINTER
1361 [frame] (
1362 )
1363 [padding2] /
1364 */
1365 struct ix86_frame
1366 {
1367 int nregs;
1368 int padding1;
1369 int va_arg_size;
1370 HOST_WIDE_INT frame;
1371 int padding2;
1372 int outgoing_arguments_size;
1373 int red_zone_size;
1374
1375 HOST_WIDE_INT to_allocate;
1376 /* The offsets relative to ARG_POINTER. */
1377 HOST_WIDE_INT frame_pointer_offset;
1378 HOST_WIDE_INT hard_frame_pointer_offset;
1379 HOST_WIDE_INT stack_pointer_offset;
1380
1381 /* When save_regs_using_mov is set, emit prologue using
1382 move instead of push instructions. */
1383 bool save_regs_using_mov;
1384 };
1385
1386 /* Code model option. */
1387 enum cmodel ix86_cmodel;
1388 /* Asm dialect. */
1389 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1390 /* TLS dialects. */
1391 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1392
1393 /* Which unit we are generating floating point math for. */
1394 enum fpmath_unit ix86_fpmath;
1395
1396 /* Which cpu are we scheduling for. */
1397 enum processor_type ix86_tune;
1398 /* Which instruction set architecture to use. */
1399 enum processor_type ix86_arch;
1400
1401 /* true if sse prefetch instruction is not NOOP. */
1402 int x86_prefetch_sse;
1403
1404 /* true if cmpxchg16b is supported. */
1405 int x86_cmpxchg16b;
1406
1407 /* ix86_regparm_string as a number */
1408 static int ix86_regparm;
1409
1410 /* -mstackrealign option */
1411 extern int ix86_force_align_arg_pointer;
1412 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1413
1414 /* Preferred alignment for stack boundary in bits. */
1415 unsigned int ix86_preferred_stack_boundary;
1416
1417 /* Values 1-5: see jump.c */
1418 int ix86_branch_cost;
1419
1420 /* Variables which are this size or smaller are put in the data/bss
1421 or ldata/lbss sections. */
1422
1423 int ix86_section_threshold = 65536;
1424
1425 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1426 char internal_label_prefix[16];
1427 int internal_label_prefix_len;
1428 \f
1429 static bool ix86_handle_option (size_t, const char *, int);
1430 static void output_pic_addr_const (FILE *, rtx, int);
1431 static void put_condition_code (enum rtx_code, enum machine_mode,
1432 int, int, FILE *);
1433 static const char *get_some_local_dynamic_name (void);
1434 static int get_some_local_dynamic_name_1 (rtx *, void *);
1435 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1436 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1437 rtx *);
1438 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1439 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1440 enum machine_mode);
1441 static rtx get_thread_pointer (int);
1442 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1443 static void get_pc_thunk_name (char [32], unsigned int);
1444 static rtx gen_push (rtx);
1445 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1446 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1447 static struct machine_function * ix86_init_machine_status (void);
1448 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1449 static int ix86_nsaved_regs (void);
1450 static void ix86_emit_save_regs (void);
1451 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1452 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1453 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1454 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1455 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1456 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1457 static int ix86_issue_rate (void);
1458 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1459 static int ia32_multipass_dfa_lookahead (void);
1460 static void ix86_init_mmx_sse_builtins (void);
1461 static rtx x86_this_parameter (tree);
1462 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1463 HOST_WIDE_INT, tree);
1464 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1465 static void x86_file_start (void);
1466 static void ix86_reorg (void);
1467 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1468 static tree ix86_build_builtin_va_list (void);
1469 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1470 tree, int *, int);
1471 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1472 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1473 static bool ix86_vector_mode_supported_p (enum machine_mode);
1474
1475 static int ix86_address_cost (rtx);
1476 static bool ix86_cannot_force_const_mem (rtx);
1477 static rtx ix86_delegitimize_address (rtx);
1478
1479 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1480
1481 struct builtin_description;
1482 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1483 tree, rtx);
1484 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1485 tree, rtx);
1486 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1487 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1488 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1489 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1490 static rtx safe_vector_operand (rtx, enum machine_mode);
1491 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1492 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1493 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1494 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1495 static int ix86_fp_comparison_cost (enum rtx_code code);
1496 static unsigned int ix86_select_alt_pic_regnum (void);
1497 static int ix86_save_reg (unsigned int, int);
1498 static void ix86_compute_frame_layout (struct ix86_frame *);
1499 static int ix86_comp_type_attributes (tree, tree);
1500 static int ix86_function_regparm (tree, tree);
1501 const struct attribute_spec ix86_attribute_table[];
1502 static bool ix86_function_ok_for_sibcall (tree, tree);
1503 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1504 static int ix86_value_regno (enum machine_mode, tree, tree);
1505 static bool contains_128bit_aligned_vector_p (tree);
1506 static rtx ix86_struct_value_rtx (tree, int);
1507 static bool ix86_ms_bitfield_layout_p (tree);
1508 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1509 static int extended_reg_mentioned_1 (rtx *, void *);
1510 static bool ix86_rtx_costs (rtx, int, int, int *);
1511 static int min_insn_size (rtx);
1512 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1513 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1514 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1515 tree, bool);
1516 static void ix86_init_builtins (void);
1517 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1518 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1519 static tree ix86_builtin_conversion (enum tree_code, tree);
1520 static const char *ix86_mangle_fundamental_type (tree);
1521 static tree ix86_stack_protect_fail (void);
1522 static rtx ix86_internal_arg_pointer (void);
1523 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1524 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1525 rtx, rtx, int);
1526
1527 /* This function is only used on Solaris. */
1528 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1529 ATTRIBUTE_UNUSED;
1530
1531 /* Register class used for passing given 64bit part of the argument.
1532 These represent classes as documented by the PS ABI, with the exception
1533 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1534 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1535
1536 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1537 whenever possible (upper half does contain padding).
1538 */
1539 enum x86_64_reg_class
1540 {
1541 X86_64_NO_CLASS,
1542 X86_64_INTEGER_CLASS,
1543 X86_64_INTEGERSI_CLASS,
1544 X86_64_SSE_CLASS,
1545 X86_64_SSESF_CLASS,
1546 X86_64_SSEDF_CLASS,
1547 X86_64_SSEUP_CLASS,
1548 X86_64_X87_CLASS,
1549 X86_64_X87UP_CLASS,
1550 X86_64_COMPLEX_X87_CLASS,
1551 X86_64_MEMORY_CLASS
1552 };
1553 static const char * const x86_64_reg_class_name[] = {
1554 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1555 "sseup", "x87", "x87up", "cplx87", "no"
1556 };
1557
1558 #define MAX_CLASSES 4
1559
1560 /* Table of constants used by fldpi, fldln2, etc.... */
1561 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1562 static bool ext_80387_constants_init = 0;
1563 static void init_ext_80387_constants (void);
1564 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1565 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1566 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1567 static section *x86_64_elf_select_section (tree decl, int reloc,
1568 unsigned HOST_WIDE_INT align)
1569 ATTRIBUTE_UNUSED;
1570 \f
1571 /* Initialize the GCC target structure. */
1572 #undef TARGET_ATTRIBUTE_TABLE
1573 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1574 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1575 # undef TARGET_MERGE_DECL_ATTRIBUTES
1576 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1577 #endif
1578
1579 #undef TARGET_COMP_TYPE_ATTRIBUTES
1580 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1581
1582 #undef TARGET_INIT_BUILTINS
1583 #define TARGET_INIT_BUILTINS ix86_init_builtins
1584 #undef TARGET_EXPAND_BUILTIN
1585 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1586
1587 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1588 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1589 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1590 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1591
1592 #undef TARGET_ASM_FUNCTION_EPILOGUE
1593 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1594
1595 #undef TARGET_ENCODE_SECTION_INFO
1596 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1597 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1598 #else
1599 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1600 #endif
1601
1602 #undef TARGET_ASM_OPEN_PAREN
1603 #define TARGET_ASM_OPEN_PAREN ""
1604 #undef TARGET_ASM_CLOSE_PAREN
1605 #define TARGET_ASM_CLOSE_PAREN ""
1606
1607 #undef TARGET_ASM_ALIGNED_HI_OP
1608 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1609 #undef TARGET_ASM_ALIGNED_SI_OP
1610 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1611 #ifdef ASM_QUAD
1612 #undef TARGET_ASM_ALIGNED_DI_OP
1613 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1614 #endif
1615
1616 #undef TARGET_ASM_UNALIGNED_HI_OP
1617 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1618 #undef TARGET_ASM_UNALIGNED_SI_OP
1619 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1620 #undef TARGET_ASM_UNALIGNED_DI_OP
1621 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1622
1623 #undef TARGET_SCHED_ADJUST_COST
1624 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1625 #undef TARGET_SCHED_ISSUE_RATE
1626 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1627 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1628 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1629 ia32_multipass_dfa_lookahead
1630
1631 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1632 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1633
1634 #ifdef HAVE_AS_TLS
1635 #undef TARGET_HAVE_TLS
1636 #define TARGET_HAVE_TLS true
1637 #endif
1638 #undef TARGET_CANNOT_FORCE_CONST_MEM
1639 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1640 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1641 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1642
1643 #undef TARGET_DELEGITIMIZE_ADDRESS
1644 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1645
1646 #undef TARGET_MS_BITFIELD_LAYOUT_P
1647 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1648
1649 #if TARGET_MACHO
1650 #undef TARGET_BINDS_LOCAL_P
1651 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1652 #endif
1653
1654 #undef TARGET_ASM_OUTPUT_MI_THUNK
1655 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1656 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1657 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1658
1659 #undef TARGET_ASM_FILE_START
1660 #define TARGET_ASM_FILE_START x86_file_start
1661
1662 #undef TARGET_DEFAULT_TARGET_FLAGS
1663 #define TARGET_DEFAULT_TARGET_FLAGS \
1664 (TARGET_DEFAULT \
1665 | TARGET_64BIT_DEFAULT \
1666 | TARGET_SUBTARGET_DEFAULT \
1667 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1668
1669 #undef TARGET_HANDLE_OPTION
1670 #define TARGET_HANDLE_OPTION ix86_handle_option
1671
1672 #undef TARGET_RTX_COSTS
1673 #define TARGET_RTX_COSTS ix86_rtx_costs
1674 #undef TARGET_ADDRESS_COST
1675 #define TARGET_ADDRESS_COST ix86_address_cost
1676
1677 #undef TARGET_FIXED_CONDITION_CODE_REGS
1678 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1679 #undef TARGET_CC_MODES_COMPATIBLE
1680 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1681
1682 #undef TARGET_MACHINE_DEPENDENT_REORG
1683 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1684
1685 #undef TARGET_BUILD_BUILTIN_VA_LIST
1686 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1687
1688 #undef TARGET_MD_ASM_CLOBBERS
1689 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1690
1691 #undef TARGET_PROMOTE_PROTOTYPES
1692 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1693 #undef TARGET_STRUCT_VALUE_RTX
1694 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1695 #undef TARGET_SETUP_INCOMING_VARARGS
1696 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1697 #undef TARGET_MUST_PASS_IN_STACK
1698 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1699 #undef TARGET_PASS_BY_REFERENCE
1700 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1701 #undef TARGET_INTERNAL_ARG_POINTER
1702 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1703 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1704 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1705
1706 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1707 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1708
1709 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1710 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1711
1712 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1713 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1714
1715 #ifdef HAVE_AS_TLS
1716 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1717 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1718 #endif
1719
1720 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1721 #undef TARGET_INSERT_ATTRIBUTES
1722 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1723 #endif
1724
1725 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1726 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1727
1728 #undef TARGET_STACK_PROTECT_FAIL
1729 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1730
1731 #undef TARGET_FUNCTION_VALUE
1732 #define TARGET_FUNCTION_VALUE ix86_function_value
1733
1734 struct gcc_target targetm = TARGET_INITIALIZER;
1735
1736 \f
1737 /* The svr4 ABI for the i386 says that records and unions are returned
1738 in memory. */
1739 #ifndef DEFAULT_PCC_STRUCT_RETURN
1740 #define DEFAULT_PCC_STRUCT_RETURN 1
1741 #endif
1742
1743 /* Implement TARGET_HANDLE_OPTION. */
1744
1745 static bool
1746 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1747 {
1748 switch (code)
1749 {
1750 case OPT_m3dnow:
1751 if (!value)
1752 {
1753 target_flags &= ~MASK_3DNOW_A;
1754 target_flags_explicit |= MASK_3DNOW_A;
1755 }
1756 return true;
1757
1758 case OPT_mmmx:
1759 if (!value)
1760 {
1761 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1762 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1763 }
1764 return true;
1765
1766 case OPT_msse:
1767 if (!value)
1768 {
1769 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1770 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1771 }
1772 return true;
1773
1774 case OPT_msse2:
1775 if (!value)
1776 {
1777 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1778 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1779 }
1780 return true;
1781
1782 case OPT_msse3:
1783 if (!value)
1784 {
1785 target_flags &= ~MASK_SSE4A;
1786 target_flags_explicit |= MASK_SSE4A;
1787 }
1788 return true;
1789
1790 default:
1791 return true;
1792 }
1793 }
1794
1795 /* Sometimes certain combinations of command options do not make
1796 sense on a particular target machine. You can define a macro
1797 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1798 defined, is executed once just after all the command options have
1799 been parsed.
1800
1801 Don't use this macro to turn on various extra optimizations for
1802 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1803
1804 void
1805 override_options (void)
1806 {
1807 int i;
1808 int ix86_tune_defaulted = 0;
1809
1810 /* Comes from final.c -- no real reason to change it. */
1811 #define MAX_CODE_ALIGN 16
1812
1813 static struct ptt
1814 {
1815 const struct processor_costs *cost; /* Processor costs */
1816 const int target_enable; /* Target flags to enable. */
1817 const int target_disable; /* Target flags to disable. */
1818 const int align_loop; /* Default alignments. */
1819 const int align_loop_max_skip;
1820 const int align_jump;
1821 const int align_jump_max_skip;
1822 const int align_func;
1823 }
1824 const processor_target_table[PROCESSOR_max] =
1825 {
1826 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1827 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1828 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1829 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1830 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1831 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1832 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1833 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1834 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1835 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1836 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1837 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1838 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1839 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1840 };
1841
1842 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1843 static struct pta
1844 {
1845 const char *const name; /* processor name or nickname. */
1846 const enum processor_type processor;
1847 const enum pta_flags
1848 {
1849 PTA_SSE = 1,
1850 PTA_SSE2 = 2,
1851 PTA_SSE3 = 4,
1852 PTA_MMX = 8,
1853 PTA_PREFETCH_SSE = 16,
1854 PTA_3DNOW = 32,
1855 PTA_3DNOW_A = 64,
1856 PTA_64BIT = 128,
1857 PTA_SSSE3 = 256,
1858 PTA_CX16 = 512,
1859 PTA_POPCNT = 1024,
1860 PTA_ABM = 2048,
1861 PTA_SSE4A = 4096
1862 } flags;
1863 }
1864 const processor_alias_table[] =
1865 {
1866 {"i386", PROCESSOR_I386, 0},
1867 {"i486", PROCESSOR_I486, 0},
1868 {"i586", PROCESSOR_PENTIUM, 0},
1869 {"pentium", PROCESSOR_PENTIUM, 0},
1870 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1871 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1872 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1873 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1874 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1875 {"i686", PROCESSOR_PENTIUMPRO, 0},
1876 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1877 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1878 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1879 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1880 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1881 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1882 | PTA_MMX | PTA_PREFETCH_SSE},
1883 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1884 | PTA_MMX | PTA_PREFETCH_SSE},
1885 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1886 | PTA_MMX | PTA_PREFETCH_SSE},
1887 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1888 | PTA_MMX | PTA_PREFETCH_SSE | PTA_CX16},
1889 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1890 | PTA_64BIT | PTA_MMX
1891 | PTA_PREFETCH_SSE | PTA_CX16},
1892 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1893 | PTA_3DNOW_A},
1894 {"k6", PROCESSOR_K6, PTA_MMX},
1895 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1896 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1897 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1898 | PTA_3DNOW_A},
1899 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1900 | PTA_3DNOW | PTA_3DNOW_A},
1901 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1902 | PTA_3DNOW_A | PTA_SSE},
1903 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1904 | PTA_3DNOW_A | PTA_SSE},
1905 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1906 | PTA_3DNOW_A | PTA_SSE},
1907 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1908 | PTA_SSE | PTA_SSE2 },
1909 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1910 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1911 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1912 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1913 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1914 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1915 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1916 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2},
1917 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1918 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1919 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1920 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1921 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1922 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1923 };
1924
1925 int const pta_size = ARRAY_SIZE (processor_alias_table);
1926
1927 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1928 SUBTARGET_OVERRIDE_OPTIONS;
1929 #endif
1930
1931 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1932 SUBSUBTARGET_OVERRIDE_OPTIONS;
1933 #endif
1934
1935 /* -fPIC is the default for x86_64. */
1936 if (TARGET_MACHO && TARGET_64BIT)
1937 flag_pic = 2;
1938
1939 /* Set the default values for switches whose default depends on TARGET_64BIT
1940 in case they weren't overwritten by command line options. */
1941 if (TARGET_64BIT)
1942 {
1943 /* Mach-O doesn't support omitting the frame pointer for now. */
1944 if (flag_omit_frame_pointer == 2)
1945 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1946 if (flag_asynchronous_unwind_tables == 2)
1947 flag_asynchronous_unwind_tables = 1;
1948 if (flag_pcc_struct_return == 2)
1949 flag_pcc_struct_return = 0;
1950 }
1951 else
1952 {
1953 if (flag_omit_frame_pointer == 2)
1954 flag_omit_frame_pointer = 0;
1955 if (flag_asynchronous_unwind_tables == 2)
1956 flag_asynchronous_unwind_tables = 0;
1957 if (flag_pcc_struct_return == 2)
1958 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1959 }
1960
1961 /* Need to check -mtune=generic first. */
1962 if (ix86_tune_string)
1963 {
1964 if (!strcmp (ix86_tune_string, "generic")
1965 || !strcmp (ix86_tune_string, "i686")
1966 /* As special support for cross compilers we read -mtune=native
1967 as -mtune=generic. With native compilers we won't see the
1968 -mtune=native, as it was changed by the driver. */
1969 || !strcmp (ix86_tune_string, "native"))
1970 {
1971 if (TARGET_64BIT)
1972 ix86_tune_string = "generic64";
1973 else
1974 ix86_tune_string = "generic32";
1975 }
1976 else if (!strncmp (ix86_tune_string, "generic", 7))
1977 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1978 }
1979 else
1980 {
1981 if (ix86_arch_string)
1982 ix86_tune_string = ix86_arch_string;
1983 if (!ix86_tune_string)
1984 {
1985 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1986 ix86_tune_defaulted = 1;
1987 }
1988
1989 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1990 need to use a sensible tune option. */
1991 if (!strcmp (ix86_tune_string, "generic")
1992 || !strcmp (ix86_tune_string, "x86-64")
1993 || !strcmp (ix86_tune_string, "i686"))
1994 {
1995 if (TARGET_64BIT)
1996 ix86_tune_string = "generic64";
1997 else
1998 ix86_tune_string = "generic32";
1999 }
2000 }
2001 if (ix86_stringop_string)
2002 {
2003 if (!strcmp (ix86_stringop_string, "rep_byte"))
2004 stringop_alg = rep_prefix_1_byte;
2005 else if (!strcmp (ix86_stringop_string, "libcall"))
2006 stringop_alg = libcall;
2007 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2008 stringop_alg = rep_prefix_4_byte;
2009 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2010 stringop_alg = rep_prefix_8_byte;
2011 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2012 stringop_alg = loop_1_byte;
2013 else if (!strcmp (ix86_stringop_string, "loop"))
2014 stringop_alg = loop;
2015 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2016 stringop_alg = unrolled_loop;
2017 else
2018 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2019 }
2020 if (!strcmp (ix86_tune_string, "x86-64"))
2021 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2022 "-mtune=generic instead as appropriate.");
2023
2024 if (!ix86_arch_string)
2025 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2026 if (!strcmp (ix86_arch_string, "generic"))
2027 error ("generic CPU can be used only for -mtune= switch");
2028 if (!strncmp (ix86_arch_string, "generic", 7))
2029 error ("bad value (%s) for -march= switch", ix86_arch_string);
2030
2031 if (ix86_cmodel_string != 0)
2032 {
2033 if (!strcmp (ix86_cmodel_string, "small"))
2034 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2035 else if (!strcmp (ix86_cmodel_string, "medium"))
2036 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2037 else if (flag_pic)
2038 sorry ("code model %s not supported in PIC mode", ix86_cmodel_string);
2039 else if (!strcmp (ix86_cmodel_string, "32"))
2040 ix86_cmodel = CM_32;
2041 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2042 ix86_cmodel = CM_KERNEL;
2043 else if (!strcmp (ix86_cmodel_string, "large") && !flag_pic)
2044 ix86_cmodel = CM_LARGE;
2045 else
2046 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2047 }
2048 else
2049 {
2050 ix86_cmodel = CM_32;
2051 if (TARGET_64BIT)
2052 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2053 }
2054 if (ix86_asm_string != 0)
2055 {
2056 if (! TARGET_MACHO
2057 && !strcmp (ix86_asm_string, "intel"))
2058 ix86_asm_dialect = ASM_INTEL;
2059 else if (!strcmp (ix86_asm_string, "att"))
2060 ix86_asm_dialect = ASM_ATT;
2061 else
2062 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2063 }
2064 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2065 error ("code model %qs not supported in the %s bit mode",
2066 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2067 if (ix86_cmodel == CM_LARGE)
2068 sorry ("code model %<large%> not supported yet");
2069 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2070 sorry ("%i-bit mode not compiled in",
2071 (target_flags & MASK_64BIT) ? 64 : 32);
2072
2073 for (i = 0; i < pta_size; i++)
2074 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2075 {
2076 ix86_arch = processor_alias_table[i].processor;
2077 /* Default cpu tuning to the architecture. */
2078 ix86_tune = ix86_arch;
2079 if (processor_alias_table[i].flags & PTA_MMX
2080 && !(target_flags_explicit & MASK_MMX))
2081 target_flags |= MASK_MMX;
2082 if (processor_alias_table[i].flags & PTA_3DNOW
2083 && !(target_flags_explicit & MASK_3DNOW))
2084 target_flags |= MASK_3DNOW;
2085 if (processor_alias_table[i].flags & PTA_3DNOW_A
2086 && !(target_flags_explicit & MASK_3DNOW_A))
2087 target_flags |= MASK_3DNOW_A;
2088 if (processor_alias_table[i].flags & PTA_SSE
2089 && !(target_flags_explicit & MASK_SSE))
2090 target_flags |= MASK_SSE;
2091 if (processor_alias_table[i].flags & PTA_SSE2
2092 && !(target_flags_explicit & MASK_SSE2))
2093 target_flags |= MASK_SSE2;
2094 if (processor_alias_table[i].flags & PTA_SSE3
2095 && !(target_flags_explicit & MASK_SSE3))
2096 target_flags |= MASK_SSE3;
2097 if (processor_alias_table[i].flags & PTA_SSSE3
2098 && !(target_flags_explicit & MASK_SSSE3))
2099 target_flags |= MASK_SSSE3;
2100 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2101 x86_prefetch_sse = true;
2102 if (processor_alias_table[i].flags & PTA_CX16)
2103 x86_cmpxchg16b = true;
2104 if (processor_alias_table[i].flags & PTA_POPCNT
2105 && !(target_flags_explicit & MASK_POPCNT))
2106 target_flags |= MASK_POPCNT;
2107 if (processor_alias_table[i].flags & PTA_ABM
2108 && !(target_flags_explicit & MASK_ABM))
2109 target_flags |= MASK_ABM;
2110 if (processor_alias_table[i].flags & PTA_SSE4A
2111 && !(target_flags_explicit & MASK_SSE4A))
2112 target_flags |= MASK_SSE4A;
2113 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2114 error ("CPU you selected does not support x86-64 "
2115 "instruction set");
2116 break;
2117 }
2118
2119 if (i == pta_size)
2120 error ("bad value (%s) for -march= switch", ix86_arch_string);
2121
2122 for (i = 0; i < pta_size; i++)
2123 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2124 {
2125 ix86_tune = processor_alias_table[i].processor;
2126 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2127 {
2128 if (ix86_tune_defaulted)
2129 {
2130 ix86_tune_string = "x86-64";
2131 for (i = 0; i < pta_size; i++)
2132 if (! strcmp (ix86_tune_string,
2133 processor_alias_table[i].name))
2134 break;
2135 ix86_tune = processor_alias_table[i].processor;
2136 }
2137 else
2138 error ("CPU you selected does not support x86-64 "
2139 "instruction set");
2140 }
2141 /* Intel CPUs have always interpreted SSE prefetch instructions as
2142 NOPs; so, we can enable SSE prefetch instructions even when
2143 -mtune (rather than -march) points us to a processor that has them.
2144 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2145 higher processors. */
2146 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2147 x86_prefetch_sse = true;
2148 break;
2149 }
2150 if (i == pta_size)
2151 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2152
2153 if (optimize_size)
2154 ix86_cost = &size_cost;
2155 else
2156 ix86_cost = processor_target_table[ix86_tune].cost;
2157 target_flags |= processor_target_table[ix86_tune].target_enable;
2158 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2159
2160 /* Arrange to set up i386_stack_locals for all functions. */
2161 init_machine_status = ix86_init_machine_status;
2162
2163 /* Validate -mregparm= value. */
2164 if (ix86_regparm_string)
2165 {
2166 i = atoi (ix86_regparm_string);
2167 if (i < 0 || i > REGPARM_MAX)
2168 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2169 else
2170 ix86_regparm = i;
2171 }
2172 else
2173 if (TARGET_64BIT)
2174 ix86_regparm = REGPARM_MAX;
2175
2176 /* If the user has provided any of the -malign-* options,
2177 warn and use that value only if -falign-* is not set.
2178 Remove this code in GCC 3.2 or later. */
2179 if (ix86_align_loops_string)
2180 {
2181 warning (0, "-malign-loops is obsolete, use -falign-loops");
2182 if (align_loops == 0)
2183 {
2184 i = atoi (ix86_align_loops_string);
2185 if (i < 0 || i > MAX_CODE_ALIGN)
2186 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2187 else
2188 align_loops = 1 << i;
2189 }
2190 }
2191
2192 if (ix86_align_jumps_string)
2193 {
2194 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2195 if (align_jumps == 0)
2196 {
2197 i = atoi (ix86_align_jumps_string);
2198 if (i < 0 || i > MAX_CODE_ALIGN)
2199 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2200 else
2201 align_jumps = 1 << i;
2202 }
2203 }
2204
2205 if (ix86_align_funcs_string)
2206 {
2207 warning (0, "-malign-functions is obsolete, use -falign-functions");
2208 if (align_functions == 0)
2209 {
2210 i = atoi (ix86_align_funcs_string);
2211 if (i < 0 || i > MAX_CODE_ALIGN)
2212 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2213 else
2214 align_functions = 1 << i;
2215 }
2216 }
2217
2218 /* Default align_* from the processor table. */
2219 if (align_loops == 0)
2220 {
2221 align_loops = processor_target_table[ix86_tune].align_loop;
2222 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2223 }
2224 if (align_jumps == 0)
2225 {
2226 align_jumps = processor_target_table[ix86_tune].align_jump;
2227 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2228 }
2229 if (align_functions == 0)
2230 {
2231 align_functions = processor_target_table[ix86_tune].align_func;
2232 }
2233
2234 /* Validate -mbranch-cost= value, or provide default. */
2235 ix86_branch_cost = ix86_cost->branch_cost;
2236 if (ix86_branch_cost_string)
2237 {
2238 i = atoi (ix86_branch_cost_string);
2239 if (i < 0 || i > 5)
2240 error ("-mbranch-cost=%d is not between 0 and 5", i);
2241 else
2242 ix86_branch_cost = i;
2243 }
2244 if (ix86_section_threshold_string)
2245 {
2246 i = atoi (ix86_section_threshold_string);
2247 if (i < 0)
2248 error ("-mlarge-data-threshold=%d is negative", i);
2249 else
2250 ix86_section_threshold = i;
2251 }
2252
2253 if (ix86_tls_dialect_string)
2254 {
2255 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2256 ix86_tls_dialect = TLS_DIALECT_GNU;
2257 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2258 ix86_tls_dialect = TLS_DIALECT_GNU2;
2259 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2260 ix86_tls_dialect = TLS_DIALECT_SUN;
2261 else
2262 error ("bad value (%s) for -mtls-dialect= switch",
2263 ix86_tls_dialect_string);
2264 }
2265
2266 /* Keep nonleaf frame pointers. */
2267 if (flag_omit_frame_pointer)
2268 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2269 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2270 flag_omit_frame_pointer = 1;
2271
2272 /* If we're doing fast math, we don't care about comparison order
2273 wrt NaNs. This lets us use a shorter comparison sequence. */
2274 if (flag_finite_math_only)
2275 target_flags &= ~MASK_IEEE_FP;
2276
2277 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2278 since the insns won't need emulation. */
2279 if (x86_arch_always_fancy_math_387 & (1 << ix86_arch))
2280 target_flags &= ~MASK_NO_FANCY_MATH_387;
2281
2282 /* Likewise, if the target doesn't have a 387, or we've specified
2283 software floating point, don't use 387 inline intrinsics. */
2284 if (!TARGET_80387)
2285 target_flags |= MASK_NO_FANCY_MATH_387;
2286
2287 /* Turn on SSE3 builtins for -mssse3. */
2288 if (TARGET_SSSE3)
2289 target_flags |= MASK_SSE3;
2290
2291 /* Turn on SSE3 builtins for -msse4a. */
2292 if (TARGET_SSE4A)
2293 target_flags |= MASK_SSE3;
2294
2295 /* Turn on SSE2 builtins for -msse3. */
2296 if (TARGET_SSE3)
2297 target_flags |= MASK_SSE2;
2298
2299 /* Turn on SSE builtins for -msse2. */
2300 if (TARGET_SSE2)
2301 target_flags |= MASK_SSE;
2302
2303 /* Turn on MMX builtins for -msse. */
2304 if (TARGET_SSE)
2305 {
2306 target_flags |= MASK_MMX & ~target_flags_explicit;
2307 x86_prefetch_sse = true;
2308 }
2309
2310 /* Turn on MMX builtins for 3Dnow. */
2311 if (TARGET_3DNOW)
2312 target_flags |= MASK_MMX;
2313
2314 /* Turn on POPCNT builtins for -mabm. */
2315 if (TARGET_ABM)
2316 target_flags |= MASK_POPCNT;
2317
2318 if (TARGET_64BIT)
2319 {
2320 if (TARGET_ALIGN_DOUBLE)
2321 error ("-malign-double makes no sense in the 64bit mode");
2322 if (TARGET_RTD)
2323 error ("-mrtd calling convention not supported in the 64bit mode");
2324
2325 /* Enable by default the SSE and MMX builtins. Do allow the user to
2326 explicitly disable any of these. In particular, disabling SSE and
2327 MMX for kernel code is extremely useful. */
2328 target_flags
2329 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2330 & ~target_flags_explicit);
2331 }
2332 else
2333 {
2334 /* i386 ABI does not specify red zone. It still makes sense to use it
2335 when programmer takes care to stack from being destroyed. */
2336 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2337 target_flags |= MASK_NO_RED_ZONE;
2338 }
2339
2340 /* Validate -mpreferred-stack-boundary= value, or provide default.
2341 The default of 128 bits is for Pentium III's SSE __m128. We can't
2342 change it because of optimize_size. Otherwise, we can't mix object
2343 files compiled with -Os and -On. */
2344 ix86_preferred_stack_boundary = 128;
2345 if (ix86_preferred_stack_boundary_string)
2346 {
2347 i = atoi (ix86_preferred_stack_boundary_string);
2348 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2349 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2350 TARGET_64BIT ? 4 : 2);
2351 else
2352 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2353 }
2354
2355 /* Accept -msseregparm only if at least SSE support is enabled. */
2356 if (TARGET_SSEREGPARM
2357 && ! TARGET_SSE)
2358 error ("-msseregparm used without SSE enabled");
2359
2360 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2361
2362 if (ix86_fpmath_string != 0)
2363 {
2364 if (! strcmp (ix86_fpmath_string, "387"))
2365 ix86_fpmath = FPMATH_387;
2366 else if (! strcmp (ix86_fpmath_string, "sse"))
2367 {
2368 if (!TARGET_SSE)
2369 {
2370 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2371 ix86_fpmath = FPMATH_387;
2372 }
2373 else
2374 ix86_fpmath = FPMATH_SSE;
2375 }
2376 else if (! strcmp (ix86_fpmath_string, "387,sse")
2377 || ! strcmp (ix86_fpmath_string, "sse,387"))
2378 {
2379 if (!TARGET_SSE)
2380 {
2381 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2382 ix86_fpmath = FPMATH_387;
2383 }
2384 else if (!TARGET_80387)
2385 {
2386 warning (0, "387 instruction set disabled, using SSE arithmetics");
2387 ix86_fpmath = FPMATH_SSE;
2388 }
2389 else
2390 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2391 }
2392 else
2393 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2394 }
2395
2396 /* If the i387 is disabled, then do not return values in it. */
2397 if (!TARGET_80387)
2398 target_flags &= ~MASK_FLOAT_RETURNS;
2399
2400 if ((x86_accumulate_outgoing_args & TUNEMASK)
2401 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2402 && !optimize_size)
2403 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2404
2405 /* ??? Unwind info is not correct around the CFG unless either a frame
2406 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2407 unwind info generation to be aware of the CFG and propagating states
2408 around edges. */
2409 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2410 || flag_exceptions || flag_non_call_exceptions)
2411 && flag_omit_frame_pointer
2412 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2413 {
2414 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2415 warning (0, "unwind tables currently require either a frame pointer "
2416 "or -maccumulate-outgoing-args for correctness");
2417 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2418 }
2419
2420 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2421 {
2422 char *p;
2423 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2424 p = strchr (internal_label_prefix, 'X');
2425 internal_label_prefix_len = p - internal_label_prefix;
2426 *p = '\0';
2427 }
2428
2429 /* When scheduling description is not available, disable scheduler pass
2430 so it won't slow down the compilation and make x87 code slower. */
2431 if (!TARGET_SCHEDULE)
2432 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2433
2434 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2435 set_param_value ("simultaneous-prefetches",
2436 ix86_cost->simultaneous_prefetches);
2437 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2438 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2439 }
2440 \f
2441 /* switch to the appropriate section for output of DECL.
2442 DECL is either a `VAR_DECL' node or a constant of some sort.
2443 RELOC indicates whether forming the initial value of DECL requires
2444 link-time relocations. */
2445
2446 static section *
2447 x86_64_elf_select_section (tree decl, int reloc,
2448 unsigned HOST_WIDE_INT align)
2449 {
2450 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2451 && ix86_in_large_data_p (decl))
2452 {
2453 const char *sname = NULL;
2454 unsigned int flags = SECTION_WRITE;
2455 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2456 {
2457 case SECCAT_DATA:
2458 sname = ".ldata";
2459 break;
2460 case SECCAT_DATA_REL:
2461 sname = ".ldata.rel";
2462 break;
2463 case SECCAT_DATA_REL_LOCAL:
2464 sname = ".ldata.rel.local";
2465 break;
2466 case SECCAT_DATA_REL_RO:
2467 sname = ".ldata.rel.ro";
2468 break;
2469 case SECCAT_DATA_REL_RO_LOCAL:
2470 sname = ".ldata.rel.ro.local";
2471 break;
2472 case SECCAT_BSS:
2473 sname = ".lbss";
2474 flags |= SECTION_BSS;
2475 break;
2476 case SECCAT_RODATA:
2477 case SECCAT_RODATA_MERGE_STR:
2478 case SECCAT_RODATA_MERGE_STR_INIT:
2479 case SECCAT_RODATA_MERGE_CONST:
2480 sname = ".lrodata";
2481 flags = 0;
2482 break;
2483 case SECCAT_SRODATA:
2484 case SECCAT_SDATA:
2485 case SECCAT_SBSS:
2486 gcc_unreachable ();
2487 case SECCAT_TEXT:
2488 case SECCAT_TDATA:
2489 case SECCAT_TBSS:
2490 /* We don't split these for medium model. Place them into
2491 default sections and hope for best. */
2492 break;
2493 }
2494 if (sname)
2495 {
2496 /* We might get called with string constants, but get_named_section
2497 doesn't like them as they are not DECLs. Also, we need to set
2498 flags in that case. */
2499 if (!DECL_P (decl))
2500 return get_section (sname, flags, NULL);
2501 return get_named_section (decl, sname, reloc);
2502 }
2503 }
2504 return default_elf_select_section (decl, reloc, align);
2505 }
2506
2507 /* Build up a unique section name, expressed as a
2508 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2509 RELOC indicates whether the initial value of EXP requires
2510 link-time relocations. */
2511
2512 static void
2513 x86_64_elf_unique_section (tree decl, int reloc)
2514 {
2515 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2516 && ix86_in_large_data_p (decl))
2517 {
2518 const char *prefix = NULL;
2519 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2520 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2521
2522 switch (categorize_decl_for_section (decl, reloc, flag_pic))
2523 {
2524 case SECCAT_DATA:
2525 case SECCAT_DATA_REL:
2526 case SECCAT_DATA_REL_LOCAL:
2527 case SECCAT_DATA_REL_RO:
2528 case SECCAT_DATA_REL_RO_LOCAL:
2529 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2530 break;
2531 case SECCAT_BSS:
2532 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2533 break;
2534 case SECCAT_RODATA:
2535 case SECCAT_RODATA_MERGE_STR:
2536 case SECCAT_RODATA_MERGE_STR_INIT:
2537 case SECCAT_RODATA_MERGE_CONST:
2538 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2539 break;
2540 case SECCAT_SRODATA:
2541 case SECCAT_SDATA:
2542 case SECCAT_SBSS:
2543 gcc_unreachable ();
2544 case SECCAT_TEXT:
2545 case SECCAT_TDATA:
2546 case SECCAT_TBSS:
2547 /* We don't split these for medium model. Place them into
2548 default sections and hope for best. */
2549 break;
2550 }
2551 if (prefix)
2552 {
2553 const char *name;
2554 size_t nlen, plen;
2555 char *string;
2556 plen = strlen (prefix);
2557
2558 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2559 name = targetm.strip_name_encoding (name);
2560 nlen = strlen (name);
2561
2562 string = alloca (nlen + plen + 1);
2563 memcpy (string, prefix, plen);
2564 memcpy (string + plen, name, nlen + 1);
2565
2566 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2567 return;
2568 }
2569 }
2570 default_unique_section (decl, reloc);
2571 }
2572
2573 #ifdef COMMON_ASM_OP
2574 /* This says how to output assembler code to declare an
2575 uninitialized external linkage data object.
2576
2577 For medium model x86-64 we need to use .largecomm opcode for
2578 large objects. */
2579 void
2580 x86_elf_aligned_common (FILE *file,
2581 const char *name, unsigned HOST_WIDE_INT size,
2582 int align)
2583 {
2584 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2585 && size > (unsigned int)ix86_section_threshold)
2586 fprintf (file, ".largecomm\t");
2587 else
2588 fprintf (file, "%s", COMMON_ASM_OP);
2589 assemble_name (file, name);
2590 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2591 size, align / BITS_PER_UNIT);
2592 }
2593 #endif
2594 /* Utility function for targets to use in implementing
2595 ASM_OUTPUT_ALIGNED_BSS. */
2596
2597 void
2598 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2599 const char *name, unsigned HOST_WIDE_INT size,
2600 int align)
2601 {
2602 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2603 && size > (unsigned int)ix86_section_threshold)
2604 switch_to_section (get_named_section (decl, ".lbss", 0));
2605 else
2606 switch_to_section (bss_section);
2607 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2608 #ifdef ASM_DECLARE_OBJECT_NAME
2609 last_assemble_variable_decl = decl;
2610 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2611 #else
2612 /* Standard thing is just output label for the object. */
2613 ASM_OUTPUT_LABEL (file, name);
2614 #endif /* ASM_DECLARE_OBJECT_NAME */
2615 ASM_OUTPUT_SKIP (file, size ? size : 1);
2616 }
2617 \f
2618 void
2619 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2620 {
2621 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2622 make the problem with not enough registers even worse. */
2623 #ifdef INSN_SCHEDULING
2624 if (level > 1)
2625 flag_schedule_insns = 0;
2626 #endif
2627
2628 if (TARGET_MACHO)
2629 /* The Darwin libraries never set errno, so we might as well
2630 avoid calling them when that's the only reason we would. */
2631 flag_errno_math = 0;
2632
2633 /* The default values of these switches depend on the TARGET_64BIT
2634 that is not known at this moment. Mark these values with 2 and
2635 let user the to override these. In case there is no command line option
2636 specifying them, we will set the defaults in override_options. */
2637 if (optimize >= 1)
2638 flag_omit_frame_pointer = 2;
2639 flag_pcc_struct_return = 2;
2640 flag_asynchronous_unwind_tables = 2;
2641 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2642 SUBTARGET_OPTIMIZATION_OPTIONS;
2643 #endif
2644 }
2645 \f
2646 /* Table of valid machine attributes. */
2647 const struct attribute_spec ix86_attribute_table[] =
2648 {
2649 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2650 /* Stdcall attribute says callee is responsible for popping arguments
2651 if they are not variable. */
2652 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2653 /* Fastcall attribute says callee is responsible for popping arguments
2654 if they are not variable. */
2655 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2656 /* Cdecl attribute says the callee is a normal C declaration */
2657 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2658 /* Regparm attribute specifies how many integer arguments are to be
2659 passed in registers. */
2660 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2661 /* Sseregparm attribute says we are using x86_64 calling conventions
2662 for FP arguments. */
2663 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2664 /* force_align_arg_pointer says this function realigns the stack at entry. */
2665 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2666 false, true, true, ix86_handle_cconv_attribute },
2667 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2668 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2669 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2670 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2671 #endif
2672 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2673 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2674 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2675 SUBTARGET_ATTRIBUTE_TABLE,
2676 #endif
2677 { NULL, 0, 0, false, false, false, NULL }
2678 };
2679
2680 /* Decide whether we can make a sibling call to a function. DECL is the
2681 declaration of the function being targeted by the call and EXP is the
2682 CALL_EXPR representing the call. */
2683
2684 static bool
2685 ix86_function_ok_for_sibcall (tree decl, tree exp)
2686 {
2687 tree func;
2688 rtx a, b;
2689
2690 /* If we are generating position-independent code, we cannot sibcall
2691 optimize any indirect call, or a direct call to a global function,
2692 as the PLT requires %ebx be live. */
2693 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2694 return false;
2695
2696 if (decl)
2697 func = decl;
2698 else
2699 {
2700 func = TREE_TYPE (CALL_EXPR_FN (exp));
2701 if (POINTER_TYPE_P (func))
2702 func = TREE_TYPE (func);
2703 }
2704
2705 /* Check that the return value locations are the same. Like
2706 if we are returning floats on the 80387 register stack, we cannot
2707 make a sibcall from a function that doesn't return a float to a
2708 function that does or, conversely, from a function that does return
2709 a float to a function that doesn't; the necessary stack adjustment
2710 would not be executed. This is also the place we notice
2711 differences in the return value ABI. Note that it is ok for one
2712 of the functions to have void return type as long as the return
2713 value of the other is passed in a register. */
2714 a = ix86_function_value (TREE_TYPE (exp), func, false);
2715 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2716 cfun->decl, false);
2717 if (STACK_REG_P (a) || STACK_REG_P (b))
2718 {
2719 if (!rtx_equal_p (a, b))
2720 return false;
2721 }
2722 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2723 ;
2724 else if (!rtx_equal_p (a, b))
2725 return false;
2726
2727 /* If this call is indirect, we'll need to be able to use a call-clobbered
2728 register for the address of the target function. Make sure that all
2729 such registers are not used for passing parameters. */
2730 if (!decl && !TARGET_64BIT)
2731 {
2732 tree type;
2733
2734 /* We're looking at the CALL_EXPR, we need the type of the function. */
2735 type = CALL_EXPR_FN (exp); /* pointer expression */
2736 type = TREE_TYPE (type); /* pointer type */
2737 type = TREE_TYPE (type); /* function type */
2738
2739 if (ix86_function_regparm (type, NULL) >= 3)
2740 {
2741 /* ??? Need to count the actual number of registers to be used,
2742 not the possible number of registers. Fix later. */
2743 return false;
2744 }
2745 }
2746
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 /* Dllimport'd functions are also called indirectly. */
2749 if (decl && DECL_DLLIMPORT_P (decl)
2750 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2751 return false;
2752 #endif
2753
2754 /* If we forced aligned the stack, then sibcalling would unalign the
2755 stack, which may break the called function. */
2756 if (cfun->machine->force_align_arg_pointer)
2757 return false;
2758
2759 /* Otherwise okay. That also includes certain types of indirect calls. */
2760 return true;
2761 }
2762
2763 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2764 calling convention attributes;
2765 arguments as in struct attribute_spec.handler. */
2766
2767 static tree
2768 ix86_handle_cconv_attribute (tree *node, tree name,
2769 tree args,
2770 int flags ATTRIBUTE_UNUSED,
2771 bool *no_add_attrs)
2772 {
2773 if (TREE_CODE (*node) != FUNCTION_TYPE
2774 && TREE_CODE (*node) != METHOD_TYPE
2775 && TREE_CODE (*node) != FIELD_DECL
2776 && TREE_CODE (*node) != TYPE_DECL)
2777 {
2778 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2779 IDENTIFIER_POINTER (name));
2780 *no_add_attrs = true;
2781 return NULL_TREE;
2782 }
2783
2784 /* Can combine regparm with all attributes but fastcall. */
2785 if (is_attribute_p ("regparm", name))
2786 {
2787 tree cst;
2788
2789 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2790 {
2791 error ("fastcall and regparm attributes are not compatible");
2792 }
2793
2794 cst = TREE_VALUE (args);
2795 if (TREE_CODE (cst) != INTEGER_CST)
2796 {
2797 warning (OPT_Wattributes,
2798 "%qs attribute requires an integer constant argument",
2799 IDENTIFIER_POINTER (name));
2800 *no_add_attrs = true;
2801 }
2802 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2803 {
2804 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2805 IDENTIFIER_POINTER (name), REGPARM_MAX);
2806 *no_add_attrs = true;
2807 }
2808
2809 if (!TARGET_64BIT
2810 && lookup_attribute (ix86_force_align_arg_pointer_string,
2811 TYPE_ATTRIBUTES (*node))
2812 && compare_tree_int (cst, REGPARM_MAX-1))
2813 {
2814 error ("%s functions limited to %d register parameters",
2815 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2816 }
2817
2818 return NULL_TREE;
2819 }
2820
2821 if (TARGET_64BIT)
2822 {
2823 warning (OPT_Wattributes, "%qs attribute ignored",
2824 IDENTIFIER_POINTER (name));
2825 *no_add_attrs = true;
2826 return NULL_TREE;
2827 }
2828
2829 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2830 if (is_attribute_p ("fastcall", name))
2831 {
2832 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2833 {
2834 error ("fastcall and cdecl attributes are not compatible");
2835 }
2836 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2837 {
2838 error ("fastcall and stdcall attributes are not compatible");
2839 }
2840 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2841 {
2842 error ("fastcall and regparm attributes are not compatible");
2843 }
2844 }
2845
2846 /* Can combine stdcall with fastcall (redundant), regparm and
2847 sseregparm. */
2848 else if (is_attribute_p ("stdcall", name))
2849 {
2850 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2851 {
2852 error ("stdcall and cdecl attributes are not compatible");
2853 }
2854 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2855 {
2856 error ("stdcall and fastcall attributes are not compatible");
2857 }
2858 }
2859
2860 /* Can combine cdecl with regparm and sseregparm. */
2861 else if (is_attribute_p ("cdecl", name))
2862 {
2863 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2864 {
2865 error ("stdcall and cdecl attributes are not compatible");
2866 }
2867 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2868 {
2869 error ("fastcall and cdecl attributes are not compatible");
2870 }
2871 }
2872
2873 /* Can combine sseregparm with all attributes. */
2874
2875 return NULL_TREE;
2876 }
2877
2878 /* Return 0 if the attributes for two types are incompatible, 1 if they
2879 are compatible, and 2 if they are nearly compatible (which causes a
2880 warning to be generated). */
2881
2882 static int
2883 ix86_comp_type_attributes (tree type1, tree type2)
2884 {
2885 /* Check for mismatch of non-default calling convention. */
2886 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2887
2888 if (TREE_CODE (type1) != FUNCTION_TYPE)
2889 return 1;
2890
2891 /* Check for mismatched fastcall/regparm types. */
2892 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2893 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2894 || (ix86_function_regparm (type1, NULL)
2895 != ix86_function_regparm (type2, NULL)))
2896 return 0;
2897
2898 /* Check for mismatched sseregparm types. */
2899 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2900 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2901 return 0;
2902
2903 /* Check for mismatched return types (cdecl vs stdcall). */
2904 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2905 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2906 return 0;
2907
2908 return 1;
2909 }
2910 \f
2911 /* Return the regparm value for a function with the indicated TYPE and DECL.
2912 DECL may be NULL when calling function indirectly
2913 or considering a libcall. */
2914
2915 static int
2916 ix86_function_regparm (tree type, tree decl)
2917 {
2918 tree attr;
2919 int regparm = ix86_regparm;
2920 bool user_convention = false;
2921
2922 if (!TARGET_64BIT)
2923 {
2924 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2925 if (attr)
2926 {
2927 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2928 user_convention = true;
2929 }
2930
2931 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2932 {
2933 regparm = 2;
2934 user_convention = true;
2935 }
2936
2937 /* Use register calling convention for local functions when possible. */
2938 if (!TARGET_64BIT && !user_convention && decl
2939 && flag_unit_at_a_time && !profile_flag)
2940 {
2941 struct cgraph_local_info *i = cgraph_local_info (decl);
2942 if (i && i->local)
2943 {
2944 int local_regparm, globals = 0, regno;
2945
2946 /* Make sure no regparm register is taken by a global register
2947 variable. */
2948 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2949 if (global_regs[local_regparm])
2950 break;
2951 /* We can't use regparm(3) for nested functions as these use
2952 static chain pointer in third argument. */
2953 if (local_regparm == 3
2954 && decl_function_context (decl)
2955 && !DECL_NO_STATIC_CHAIN (decl))
2956 local_regparm = 2;
2957 /* If the function realigns its stackpointer, the
2958 prologue will clobber %ecx. If we've already
2959 generated code for the callee, the callee
2960 DECL_STRUCT_FUNCTION is gone, so we fall back to
2961 scanning the attributes for the self-realigning
2962 property. */
2963 if ((DECL_STRUCT_FUNCTION (decl)
2964 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
2965 || (!DECL_STRUCT_FUNCTION (decl)
2966 && lookup_attribute (ix86_force_align_arg_pointer_string,
2967 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2968 local_regparm = 2;
2969 /* Each global register variable increases register preassure,
2970 so the more global reg vars there are, the smaller regparm
2971 optimization use, unless requested by the user explicitly. */
2972 for (regno = 0; regno < 6; regno++)
2973 if (global_regs[regno])
2974 globals++;
2975 local_regparm
2976 = globals < local_regparm ? local_regparm - globals : 0;
2977
2978 if (local_regparm > regparm)
2979 regparm = local_regparm;
2980 }
2981 }
2982 }
2983 return regparm;
2984 }
2985
2986 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2987 DFmode (2) arguments in SSE registers for a function with the
2988 indicated TYPE and DECL. DECL may be NULL when calling function
2989 indirectly or considering a libcall. Otherwise return 0. */
2990
2991 static int
2992 ix86_function_sseregparm (tree type, tree decl)
2993 {
2994 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2995 by the sseregparm attribute. */
2996 if (TARGET_SSEREGPARM
2997 || (type
2998 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2999 {
3000 if (!TARGET_SSE)
3001 {
3002 if (decl)
3003 error ("Calling %qD with attribute sseregparm without "
3004 "SSE/SSE2 enabled", decl);
3005 else
3006 error ("Calling %qT with attribute sseregparm without "
3007 "SSE/SSE2 enabled", type);
3008 return 0;
3009 }
3010
3011 return 2;
3012 }
3013
3014 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3015 (and DFmode for SSE2) arguments in SSE registers,
3016 even for 32-bit targets. */
3017 if (!TARGET_64BIT && decl
3018 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3019 {
3020 struct cgraph_local_info *i = cgraph_local_info (decl);
3021 if (i && i->local)
3022 return TARGET_SSE2 ? 2 : 1;
3023 }
3024
3025 return 0;
3026 }
3027
3028 /* Return true if EAX is live at the start of the function. Used by
3029 ix86_expand_prologue to determine if we need special help before
3030 calling allocate_stack_worker. */
3031
3032 static bool
3033 ix86_eax_live_at_start_p (void)
3034 {
3035 /* Cheat. Don't bother working forward from ix86_function_regparm
3036 to the function type to whether an actual argument is located in
3037 eax. Instead just look at cfg info, which is still close enough
3038 to correct at this point. This gives false positives for broken
3039 functions that might use uninitialized data that happens to be
3040 allocated in eax, but who cares? */
3041 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3042 }
3043
3044 /* Value is the number of bytes of arguments automatically
3045 popped when returning from a subroutine call.
3046 FUNDECL is the declaration node of the function (as a tree),
3047 FUNTYPE is the data type of the function (as a tree),
3048 or for a library call it is an identifier node for the subroutine name.
3049 SIZE is the number of bytes of arguments passed on the stack.
3050
3051 On the 80386, the RTD insn may be used to pop them if the number
3052 of args is fixed, but if the number is variable then the caller
3053 must pop them all. RTD can't be used for library calls now
3054 because the library is compiled with the Unix compiler.
3055 Use of RTD is a selectable option, since it is incompatible with
3056 standard Unix calling sequences. If the option is not selected,
3057 the caller must always pop the args.
3058
3059 The attribute stdcall is equivalent to RTD on a per module basis. */
3060
3061 int
3062 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3063 {
3064 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3065
3066 /* Cdecl functions override -mrtd, and never pop the stack. */
3067 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3068
3069 /* Stdcall and fastcall functions will pop the stack if not
3070 variable args. */
3071 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3072 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3073 rtd = 1;
3074
3075 if (rtd
3076 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3077 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3078 == void_type_node)))
3079 return size;
3080 }
3081
3082 /* Lose any fake structure return argument if it is passed on the stack. */
3083 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3084 && !TARGET_64BIT
3085 && !KEEP_AGGREGATE_RETURN_POINTER)
3086 {
3087 int nregs = ix86_function_regparm (funtype, fundecl);
3088
3089 if (!nregs)
3090 return GET_MODE_SIZE (Pmode);
3091 }
3092
3093 return 0;
3094 }
3095 \f
3096 /* Argument support functions. */
3097
3098 /* Return true when register may be used to pass function parameters. */
3099 bool
3100 ix86_function_arg_regno_p (int regno)
3101 {
3102 int i;
3103 if (!TARGET_64BIT)
3104 {
3105 if (TARGET_MACHO)
3106 return (regno < REGPARM_MAX
3107 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3108 else
3109 return (regno < REGPARM_MAX
3110 || (TARGET_MMX && MMX_REGNO_P (regno)
3111 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3112 || (TARGET_SSE && SSE_REGNO_P (regno)
3113 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3114 }
3115
3116 if (TARGET_MACHO)
3117 {
3118 if (SSE_REGNO_P (regno) && TARGET_SSE)
3119 return true;
3120 }
3121 else
3122 {
3123 if (TARGET_SSE && SSE_REGNO_P (regno)
3124 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3125 return true;
3126 }
3127 /* RAX is used as hidden argument to va_arg functions. */
3128 if (!regno)
3129 return true;
3130 for (i = 0; i < REGPARM_MAX; i++)
3131 if (regno == x86_64_int_parameter_registers[i])
3132 return true;
3133 return false;
3134 }
3135
3136 /* Return if we do not know how to pass TYPE solely in registers. */
3137
3138 static bool
3139 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3140 {
3141 if (must_pass_in_stack_var_size_or_pad (mode, type))
3142 return true;
3143
3144 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3145 The layout_type routine is crafty and tries to trick us into passing
3146 currently unsupported vector types on the stack by using TImode. */
3147 return (!TARGET_64BIT && mode == TImode
3148 && type && TREE_CODE (type) != VECTOR_TYPE);
3149 }
3150
3151 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3152 for a call to a function whose data type is FNTYPE.
3153 For a library call, FNTYPE is 0. */
3154
3155 void
3156 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3157 tree fntype, /* tree ptr for function decl */
3158 rtx libname, /* SYMBOL_REF of library name or 0 */
3159 tree fndecl)
3160 {
3161 static CUMULATIVE_ARGS zero_cum;
3162 tree param, next_param;
3163
3164 if (TARGET_DEBUG_ARG)
3165 {
3166 fprintf (stderr, "\ninit_cumulative_args (");
3167 if (fntype)
3168 fprintf (stderr, "fntype code = %s, ret code = %s",
3169 tree_code_name[(int) TREE_CODE (fntype)],
3170 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3171 else
3172 fprintf (stderr, "no fntype");
3173
3174 if (libname)
3175 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3176 }
3177
3178 *cum = zero_cum;
3179
3180 /* Set up the number of registers to use for passing arguments. */
3181 cum->nregs = ix86_regparm;
3182 if (TARGET_SSE)
3183 cum->sse_nregs = SSE_REGPARM_MAX;
3184 if (TARGET_MMX)
3185 cum->mmx_nregs = MMX_REGPARM_MAX;
3186 cum->warn_sse = true;
3187 cum->warn_mmx = true;
3188 cum->maybe_vaarg = false;
3189
3190 /* Use ecx and edx registers if function has fastcall attribute,
3191 else look for regparm information. */
3192 if (fntype && !TARGET_64BIT)
3193 {
3194 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3195 {
3196 cum->nregs = 2;
3197 cum->fastcall = 1;
3198 }
3199 else
3200 cum->nregs = ix86_function_regparm (fntype, fndecl);
3201 }
3202
3203 /* Set up the number of SSE registers used for passing SFmode
3204 and DFmode arguments. Warn for mismatching ABI. */
3205 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3206
3207 /* Determine if this function has variable arguments. This is
3208 indicated by the last argument being 'void_type_mode' if there
3209 are no variable arguments. If there are variable arguments, then
3210 we won't pass anything in registers in 32-bit mode. */
3211
3212 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3213 {
3214 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3215 param != 0; param = next_param)
3216 {
3217 next_param = TREE_CHAIN (param);
3218 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3219 {
3220 if (!TARGET_64BIT)
3221 {
3222 cum->nregs = 0;
3223 cum->sse_nregs = 0;
3224 cum->mmx_nregs = 0;
3225 cum->warn_sse = 0;
3226 cum->warn_mmx = 0;
3227 cum->fastcall = 0;
3228 cum->float_in_sse = 0;
3229 }
3230 cum->maybe_vaarg = true;
3231 }
3232 }
3233 }
3234 if ((!fntype && !libname)
3235 || (fntype && !TYPE_ARG_TYPES (fntype)))
3236 cum->maybe_vaarg = true;
3237
3238 if (TARGET_DEBUG_ARG)
3239 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3240
3241 return;
3242 }
3243
3244 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3245 But in the case of vector types, it is some vector mode.
3246
3247 When we have only some of our vector isa extensions enabled, then there
3248 are some modes for which vector_mode_supported_p is false. For these
3249 modes, the generic vector support in gcc will choose some non-vector mode
3250 in order to implement the type. By computing the natural mode, we'll
3251 select the proper ABI location for the operand and not depend on whatever
3252 the middle-end decides to do with these vector types. */
3253
3254 static enum machine_mode
3255 type_natural_mode (tree type)
3256 {
3257 enum machine_mode mode = TYPE_MODE (type);
3258
3259 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3260 {
3261 HOST_WIDE_INT size = int_size_in_bytes (type);
3262 if ((size == 8 || size == 16)
3263 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3264 && TYPE_VECTOR_SUBPARTS (type) > 1)
3265 {
3266 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3267
3268 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3269 mode = MIN_MODE_VECTOR_FLOAT;
3270 else
3271 mode = MIN_MODE_VECTOR_INT;
3272
3273 /* Get the mode which has this inner mode and number of units. */
3274 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3275 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3276 && GET_MODE_INNER (mode) == innermode)
3277 return mode;
3278
3279 gcc_unreachable ();
3280 }
3281 }
3282
3283 return mode;
3284 }
3285
3286 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3287 this may not agree with the mode that the type system has chosen for the
3288 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3289 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3290
3291 static rtx
3292 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3293 unsigned int regno)
3294 {
3295 rtx tmp;
3296
3297 if (orig_mode != BLKmode)
3298 tmp = gen_rtx_REG (orig_mode, regno);
3299 else
3300 {
3301 tmp = gen_rtx_REG (mode, regno);
3302 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3303 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3304 }
3305
3306 return tmp;
3307 }
3308
3309 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3310 of this code is to classify each 8bytes of incoming argument by the register
3311 class and assign registers accordingly. */
3312
3313 /* Return the union class of CLASS1 and CLASS2.
3314 See the x86-64 PS ABI for details. */
3315
3316 static enum x86_64_reg_class
3317 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3318 {
3319 /* Rule #1: If both classes are equal, this is the resulting class. */
3320 if (class1 == class2)
3321 return class1;
3322
3323 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3324 the other class. */
3325 if (class1 == X86_64_NO_CLASS)
3326 return class2;
3327 if (class2 == X86_64_NO_CLASS)
3328 return class1;
3329
3330 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3331 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3332 return X86_64_MEMORY_CLASS;
3333
3334 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3335 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3336 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3337 return X86_64_INTEGERSI_CLASS;
3338 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3339 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3340 return X86_64_INTEGER_CLASS;
3341
3342 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3343 MEMORY is used. */
3344 if (class1 == X86_64_X87_CLASS
3345 || class1 == X86_64_X87UP_CLASS
3346 || class1 == X86_64_COMPLEX_X87_CLASS
3347 || class2 == X86_64_X87_CLASS
3348 || class2 == X86_64_X87UP_CLASS
3349 || class2 == X86_64_COMPLEX_X87_CLASS)
3350 return X86_64_MEMORY_CLASS;
3351
3352 /* Rule #6: Otherwise class SSE is used. */
3353 return X86_64_SSE_CLASS;
3354 }
3355
3356 /* Classify the argument of type TYPE and mode MODE.
3357 CLASSES will be filled by the register class used to pass each word
3358 of the operand. The number of words is returned. In case the parameter
3359 should be passed in memory, 0 is returned. As a special case for zero
3360 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3361
3362 BIT_OFFSET is used internally for handling records and specifies offset
3363 of the offset in bits modulo 256 to avoid overflow cases.
3364
3365 See the x86-64 PS ABI for details.
3366 */
3367
3368 static int
3369 classify_argument (enum machine_mode mode, tree type,
3370 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3371 {
3372 HOST_WIDE_INT bytes =
3373 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3374 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3375
3376 /* Variable sized entities are always passed/returned in memory. */
3377 if (bytes < 0)
3378 return 0;
3379
3380 if (mode != VOIDmode
3381 && targetm.calls.must_pass_in_stack (mode, type))
3382 return 0;
3383
3384 if (type && AGGREGATE_TYPE_P (type))
3385 {
3386 int i;
3387 tree field;
3388 enum x86_64_reg_class subclasses[MAX_CLASSES];
3389
3390 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3391 if (bytes > 16)
3392 return 0;
3393
3394 for (i = 0; i < words; i++)
3395 classes[i] = X86_64_NO_CLASS;
3396
3397 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3398 signalize memory class, so handle it as special case. */
3399 if (!words)
3400 {
3401 classes[0] = X86_64_NO_CLASS;
3402 return 1;
3403 }
3404
3405 /* Classify each field of record and merge classes. */
3406 switch (TREE_CODE (type))
3407 {
3408 case RECORD_TYPE:
3409 /* And now merge the fields of structure. */
3410 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3411 {
3412 if (TREE_CODE (field) == FIELD_DECL)
3413 {
3414 int num;
3415
3416 if (TREE_TYPE (field) == error_mark_node)
3417 continue;
3418
3419 /* Bitfields are always classified as integer. Handle them
3420 early, since later code would consider them to be
3421 misaligned integers. */
3422 if (DECL_BIT_FIELD (field))
3423 {
3424 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3425 i < ((int_bit_position (field) + (bit_offset % 64))
3426 + tree_low_cst (DECL_SIZE (field), 0)
3427 + 63) / 8 / 8; i++)
3428 classes[i] =
3429 merge_classes (X86_64_INTEGER_CLASS,
3430 classes[i]);
3431 }
3432 else
3433 {
3434 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3435 TREE_TYPE (field), subclasses,
3436 (int_bit_position (field)
3437 + bit_offset) % 256);
3438 if (!num)
3439 return 0;
3440 for (i = 0; i < num; i++)
3441 {
3442 int pos =
3443 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3444 classes[i + pos] =
3445 merge_classes (subclasses[i], classes[i + pos]);
3446 }
3447 }
3448 }
3449 }
3450 break;
3451
3452 case ARRAY_TYPE:
3453 /* Arrays are handled as small records. */
3454 {
3455 int num;
3456 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3457 TREE_TYPE (type), subclasses, bit_offset);
3458 if (!num)
3459 return 0;
3460
3461 /* The partial classes are now full classes. */
3462 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3463 subclasses[0] = X86_64_SSE_CLASS;
3464 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3465 subclasses[0] = X86_64_INTEGER_CLASS;
3466
3467 for (i = 0; i < words; i++)
3468 classes[i] = subclasses[i % num];
3469
3470 break;
3471 }
3472 case UNION_TYPE:
3473 case QUAL_UNION_TYPE:
3474 /* Unions are similar to RECORD_TYPE but offset is always 0.
3475 */
3476 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3477 {
3478 if (TREE_CODE (field) == FIELD_DECL)
3479 {
3480 int num;
3481
3482 if (TREE_TYPE (field) == error_mark_node)
3483 continue;
3484
3485 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3486 TREE_TYPE (field), subclasses,
3487 bit_offset);
3488 if (!num)
3489 return 0;
3490 for (i = 0; i < num; i++)
3491 classes[i] = merge_classes (subclasses[i], classes[i]);
3492 }
3493 }
3494 break;
3495
3496 default:
3497 gcc_unreachable ();
3498 }
3499
3500 /* Final merger cleanup. */
3501 for (i = 0; i < words; i++)
3502 {
3503 /* If one class is MEMORY, everything should be passed in
3504 memory. */
3505 if (classes[i] == X86_64_MEMORY_CLASS)
3506 return 0;
3507
3508 /* The X86_64_SSEUP_CLASS should be always preceded by
3509 X86_64_SSE_CLASS. */
3510 if (classes[i] == X86_64_SSEUP_CLASS
3511 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3512 classes[i] = X86_64_SSE_CLASS;
3513
3514 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3515 if (classes[i] == X86_64_X87UP_CLASS
3516 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3517 classes[i] = X86_64_SSE_CLASS;
3518 }
3519 return words;
3520 }
3521
3522 /* Compute alignment needed. We align all types to natural boundaries with
3523 exception of XFmode that is aligned to 64bits. */
3524 if (mode != VOIDmode && mode != BLKmode)
3525 {
3526 int mode_alignment = GET_MODE_BITSIZE (mode);
3527
3528 if (mode == XFmode)
3529 mode_alignment = 128;
3530 else if (mode == XCmode)
3531 mode_alignment = 256;
3532 if (COMPLEX_MODE_P (mode))
3533 mode_alignment /= 2;
3534 /* Misaligned fields are always returned in memory. */
3535 if (bit_offset % mode_alignment)
3536 return 0;
3537 }
3538
3539 /* for V1xx modes, just use the base mode */
3540 if (VECTOR_MODE_P (mode)
3541 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3542 mode = GET_MODE_INNER (mode);
3543
3544 /* Classification of atomic types. */
3545 switch (mode)
3546 {
3547 case SDmode:
3548 case DDmode:
3549 classes[0] = X86_64_SSE_CLASS;
3550 return 1;
3551 case TDmode:
3552 classes[0] = X86_64_SSE_CLASS;
3553 classes[1] = X86_64_SSEUP_CLASS;
3554 return 2;
3555 case DImode:
3556 case SImode:
3557 case HImode:
3558 case QImode:
3559 case CSImode:
3560 case CHImode:
3561 case CQImode:
3562 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3563 classes[0] = X86_64_INTEGERSI_CLASS;
3564 else
3565 classes[0] = X86_64_INTEGER_CLASS;
3566 return 1;
3567 case CDImode:
3568 case TImode:
3569 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3570 return 2;
3571 case CTImode:
3572 return 0;
3573 case SFmode:
3574 if (!(bit_offset % 64))
3575 classes[0] = X86_64_SSESF_CLASS;
3576 else
3577 classes[0] = X86_64_SSE_CLASS;
3578 return 1;
3579 case DFmode:
3580 classes[0] = X86_64_SSEDF_CLASS;
3581 return 1;
3582 case XFmode:
3583 classes[0] = X86_64_X87_CLASS;
3584 classes[1] = X86_64_X87UP_CLASS;
3585 return 2;
3586 case TFmode:
3587 classes[0] = X86_64_SSE_CLASS;
3588 classes[1] = X86_64_SSEUP_CLASS;
3589 return 2;
3590 case SCmode:
3591 classes[0] = X86_64_SSE_CLASS;
3592 return 1;
3593 case DCmode:
3594 classes[0] = X86_64_SSEDF_CLASS;
3595 classes[1] = X86_64_SSEDF_CLASS;
3596 return 2;
3597 case XCmode:
3598 classes[0] = X86_64_COMPLEX_X87_CLASS;
3599 return 1;
3600 case TCmode:
3601 /* This modes is larger than 16 bytes. */
3602 return 0;
3603 case V4SFmode:
3604 case V4SImode:
3605 case V16QImode:
3606 case V8HImode:
3607 case V2DFmode:
3608 case V2DImode:
3609 classes[0] = X86_64_SSE_CLASS;
3610 classes[1] = X86_64_SSEUP_CLASS;
3611 return 2;
3612 case V2SFmode:
3613 case V2SImode:
3614 case V4HImode:
3615 case V8QImode:
3616 classes[0] = X86_64_SSE_CLASS;
3617 return 1;
3618 case BLKmode:
3619 case VOIDmode:
3620 return 0;
3621 default:
3622 gcc_assert (VECTOR_MODE_P (mode));
3623
3624 if (bytes > 16)
3625 return 0;
3626
3627 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3628
3629 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3630 classes[0] = X86_64_INTEGERSI_CLASS;
3631 else
3632 classes[0] = X86_64_INTEGER_CLASS;
3633 classes[1] = X86_64_INTEGER_CLASS;
3634 return 1 + (bytes > 8);
3635 }
3636 }
3637
3638 /* Examine the argument and return set number of register required in each
3639 class. Return 0 iff parameter should be passed in memory. */
3640 static int
3641 examine_argument (enum machine_mode mode, tree type, int in_return,
3642 int *int_nregs, int *sse_nregs)
3643 {
3644 enum x86_64_reg_class class[MAX_CLASSES];
3645 int n = classify_argument (mode, type, class, 0);
3646
3647 *int_nregs = 0;
3648 *sse_nregs = 0;
3649 if (!n)
3650 return 0;
3651 for (n--; n >= 0; n--)
3652 switch (class[n])
3653 {
3654 case X86_64_INTEGER_CLASS:
3655 case X86_64_INTEGERSI_CLASS:
3656 (*int_nregs)++;
3657 break;
3658 case X86_64_SSE_CLASS:
3659 case X86_64_SSESF_CLASS:
3660 case X86_64_SSEDF_CLASS:
3661 (*sse_nregs)++;
3662 break;
3663 case X86_64_NO_CLASS:
3664 case X86_64_SSEUP_CLASS:
3665 break;
3666 case X86_64_X87_CLASS:
3667 case X86_64_X87UP_CLASS:
3668 if (!in_return)
3669 return 0;
3670 break;
3671 case X86_64_COMPLEX_X87_CLASS:
3672 return in_return ? 2 : 0;
3673 case X86_64_MEMORY_CLASS:
3674 gcc_unreachable ();
3675 }
3676 return 1;
3677 }
3678
3679 /* Construct container for the argument used by GCC interface. See
3680 FUNCTION_ARG for the detailed description. */
3681
3682 static rtx
3683 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3684 tree type, int in_return, int nintregs, int nsseregs,
3685 const int *intreg, int sse_regno)
3686 {
3687 /* The following variables hold the static issued_error state. */
3688 static bool issued_sse_arg_error;
3689 static bool issued_sse_ret_error;
3690 static bool issued_x87_ret_error;
3691
3692 enum machine_mode tmpmode;
3693 int bytes =
3694 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3695 enum x86_64_reg_class class[MAX_CLASSES];
3696 int n;
3697 int i;
3698 int nexps = 0;
3699 int needed_sseregs, needed_intregs;
3700 rtx exp[MAX_CLASSES];
3701 rtx ret;
3702
3703 n = classify_argument (mode, type, class, 0);
3704 if (TARGET_DEBUG_ARG)
3705 {
3706 if (!n)
3707 fprintf (stderr, "Memory class\n");
3708 else
3709 {
3710 fprintf (stderr, "Classes:");
3711 for (i = 0; i < n; i++)
3712 {
3713 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3714 }
3715 fprintf (stderr, "\n");
3716 }
3717 }
3718 if (!n)
3719 return NULL;
3720 if (!examine_argument (mode, type, in_return, &needed_intregs,
3721 &needed_sseregs))
3722 return NULL;
3723 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3724 return NULL;
3725
3726 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3727 some less clueful developer tries to use floating-point anyway. */
3728 if (needed_sseregs && !TARGET_SSE)
3729 {
3730 if (in_return)
3731 {
3732 if (!issued_sse_ret_error)
3733 {
3734 error ("SSE register return with SSE disabled");
3735 issued_sse_ret_error = true;
3736 }
3737 }
3738 else if (!issued_sse_arg_error)
3739 {
3740 error ("SSE register argument with SSE disabled");
3741 issued_sse_arg_error = true;
3742 }
3743 return NULL;
3744 }
3745
3746 /* Likewise, error if the ABI requires us to return values in the
3747 x87 registers and the user specified -mno-80387. */
3748 if (!TARGET_80387 && in_return)
3749 for (i = 0; i < n; i++)
3750 if (class[i] == X86_64_X87_CLASS
3751 || class[i] == X86_64_X87UP_CLASS
3752 || class[i] == X86_64_COMPLEX_X87_CLASS)
3753 {
3754 if (!issued_x87_ret_error)
3755 {
3756 error ("x87 register return with x87 disabled");
3757 issued_x87_ret_error = true;
3758 }
3759 return NULL;
3760 }
3761
3762 /* First construct simple cases. Avoid SCmode, since we want to use
3763 single register to pass this type. */
3764 if (n == 1 && mode != SCmode)
3765 switch (class[0])
3766 {
3767 case X86_64_INTEGER_CLASS:
3768 case X86_64_INTEGERSI_CLASS:
3769 return gen_rtx_REG (mode, intreg[0]);
3770 case X86_64_SSE_CLASS:
3771 case X86_64_SSESF_CLASS:
3772 case X86_64_SSEDF_CLASS:
3773 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3774 case X86_64_X87_CLASS:
3775 case X86_64_COMPLEX_X87_CLASS:
3776 return gen_rtx_REG (mode, FIRST_STACK_REG);
3777 case X86_64_NO_CLASS:
3778 /* Zero sized array, struct or class. */
3779 return NULL;
3780 default:
3781 gcc_unreachable ();
3782 }
3783 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3784 && mode != BLKmode)
3785 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3786 if (n == 2
3787 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3788 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3789 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3790 && class[1] == X86_64_INTEGER_CLASS
3791 && (mode == CDImode || mode == TImode || mode == TFmode)
3792 && intreg[0] + 1 == intreg[1])
3793 return gen_rtx_REG (mode, intreg[0]);
3794
3795 /* Otherwise figure out the entries of the PARALLEL. */
3796 for (i = 0; i < n; i++)
3797 {
3798 switch (class[i])
3799 {
3800 case X86_64_NO_CLASS:
3801 break;
3802 case X86_64_INTEGER_CLASS:
3803 case X86_64_INTEGERSI_CLASS:
3804 /* Merge TImodes on aligned occasions here too. */
3805 if (i * 8 + 8 > bytes)
3806 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3807 else if (class[i] == X86_64_INTEGERSI_CLASS)
3808 tmpmode = SImode;
3809 else
3810 tmpmode = DImode;
3811 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3812 if (tmpmode == BLKmode)
3813 tmpmode = DImode;
3814 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3815 gen_rtx_REG (tmpmode, *intreg),
3816 GEN_INT (i*8));
3817 intreg++;
3818 break;
3819 case X86_64_SSESF_CLASS:
3820 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3821 gen_rtx_REG (SFmode,
3822 SSE_REGNO (sse_regno)),
3823 GEN_INT (i*8));
3824 sse_regno++;
3825 break;
3826 case X86_64_SSEDF_CLASS:
3827 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3828 gen_rtx_REG (DFmode,
3829 SSE_REGNO (sse_regno)),
3830 GEN_INT (i*8));
3831 sse_regno++;
3832 break;
3833 case X86_64_SSE_CLASS:
3834 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3835 tmpmode = TImode;
3836 else
3837 tmpmode = DImode;
3838 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3839 gen_rtx_REG (tmpmode,
3840 SSE_REGNO (sse_regno)),
3841 GEN_INT (i*8));
3842 if (tmpmode == TImode)
3843 i++;
3844 sse_regno++;
3845 break;
3846 default:
3847 gcc_unreachable ();
3848 }
3849 }
3850
3851 /* Empty aligned struct, union or class. */
3852 if (nexps == 0)
3853 return NULL;
3854
3855 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3856 for (i = 0; i < nexps; i++)
3857 XVECEXP (ret, 0, i) = exp [i];
3858 return ret;
3859 }
3860
3861 /* Update the data in CUM to advance over an argument
3862 of mode MODE and data type TYPE.
3863 (TYPE is null for libcalls where that information may not be available.) */
3864
3865 void
3866 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3867 tree type, int named)
3868 {
3869 int bytes =
3870 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3871 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3872
3873 if (type)
3874 mode = type_natural_mode (type);
3875
3876 if (TARGET_DEBUG_ARG)
3877 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3878 "mode=%s, named=%d)\n\n",
3879 words, cum->words, cum->nregs, cum->sse_nregs,
3880 GET_MODE_NAME (mode), named);
3881
3882 if (TARGET_64BIT)
3883 {
3884 int int_nregs, sse_nregs;
3885 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3886 cum->words += words;
3887 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3888 {
3889 cum->nregs -= int_nregs;
3890 cum->sse_nregs -= sse_nregs;
3891 cum->regno += int_nregs;
3892 cum->sse_regno += sse_nregs;
3893 }
3894 else
3895 cum->words += words;
3896 }
3897 else
3898 {
3899 switch (mode)
3900 {
3901 default:
3902 break;
3903
3904 case BLKmode:
3905 if (bytes < 0)
3906 break;
3907 /* FALLTHRU */
3908
3909 case DImode:
3910 case SImode:
3911 case HImode:
3912 case QImode:
3913 cum->words += words;
3914 cum->nregs -= words;
3915 cum->regno += words;
3916
3917 if (cum->nregs <= 0)
3918 {
3919 cum->nregs = 0;
3920 cum->regno = 0;
3921 }
3922 break;
3923
3924 case DFmode:
3925 if (cum->float_in_sse < 2)
3926 break;
3927 case SFmode:
3928 if (cum->float_in_sse < 1)
3929 break;
3930 /* FALLTHRU */
3931
3932 case TImode:
3933 case V16QImode:
3934 case V8HImode:
3935 case V4SImode:
3936 case V2DImode:
3937 case V4SFmode:
3938 case V2DFmode:
3939 if (!type || !AGGREGATE_TYPE_P (type))
3940 {
3941 cum->sse_words += words;
3942 cum->sse_nregs -= 1;
3943 cum->sse_regno += 1;
3944 if (cum->sse_nregs <= 0)
3945 {
3946 cum->sse_nregs = 0;
3947 cum->sse_regno = 0;
3948 }
3949 }
3950 break;
3951
3952 case V8QImode:
3953 case V4HImode:
3954 case V2SImode:
3955 case V2SFmode:
3956 if (!type || !AGGREGATE_TYPE_P (type))
3957 {
3958 cum->mmx_words += words;
3959 cum->mmx_nregs -= 1;
3960 cum->mmx_regno += 1;
3961 if (cum->mmx_nregs <= 0)
3962 {
3963 cum->mmx_nregs = 0;
3964 cum->mmx_regno = 0;
3965 }
3966 }
3967 break;
3968 }
3969 }
3970 }
3971
3972 /* Define where to put the arguments to a function.
3973 Value is zero to push the argument on the stack,
3974 or a hard register in which to store the argument.
3975
3976 MODE is the argument's machine mode.
3977 TYPE is the data type of the argument (as a tree).
3978 This is null for libcalls where that information may
3979 not be available.
3980 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3981 the preceding args and about the function being called.
3982 NAMED is nonzero if this argument is a named parameter
3983 (otherwise it is an extra parameter matching an ellipsis). */
3984
3985 rtx
3986 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
3987 tree type, int named)
3988 {
3989 enum machine_mode mode = orig_mode;
3990 rtx ret = NULL_RTX;
3991 int bytes =
3992 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3993 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3994 static bool warnedsse, warnedmmx;
3995
3996 /* To simplify the code below, represent vector types with a vector mode
3997 even if MMX/SSE are not active. */
3998 if (type && TREE_CODE (type) == VECTOR_TYPE)
3999 mode = type_natural_mode (type);
4000
4001 /* Handle a hidden AL argument containing number of registers for varargs
4002 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4003 any AL settings. */
4004 if (mode == VOIDmode)
4005 {
4006 if (TARGET_64BIT)
4007 return GEN_INT (cum->maybe_vaarg
4008 ? (cum->sse_nregs < 0
4009 ? SSE_REGPARM_MAX
4010 : cum->sse_regno)
4011 : -1);
4012 else
4013 return constm1_rtx;
4014 }
4015 if (TARGET_64BIT)
4016 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4017 cum->sse_nregs,
4018 &x86_64_int_parameter_registers [cum->regno],
4019 cum->sse_regno);
4020 else
4021 switch (mode)
4022 {
4023 /* For now, pass fp/complex values on the stack. */
4024 default:
4025 break;
4026
4027 case BLKmode:
4028 if (bytes < 0)
4029 break;
4030 /* FALLTHRU */
4031 case DImode:
4032 case SImode:
4033 case HImode:
4034 case QImode:
4035 if (words <= cum->nregs)
4036 {
4037 int regno = cum->regno;
4038
4039 /* Fastcall allocates the first two DWORD (SImode) or
4040 smaller arguments to ECX and EDX. */
4041 if (cum->fastcall)
4042 {
4043 if (mode == BLKmode || mode == DImode)
4044 break;
4045
4046 /* ECX not EAX is the first allocated register. */
4047 if (regno == 0)
4048 regno = 2;
4049 }
4050 ret = gen_rtx_REG (mode, regno);
4051 }
4052 break;
4053 case DFmode:
4054 if (cum->float_in_sse < 2)
4055 break;
4056 case SFmode:
4057 if (cum->float_in_sse < 1)
4058 break;
4059 /* FALLTHRU */
4060 case TImode:
4061 case V16QImode:
4062 case V8HImode:
4063 case V4SImode:
4064 case V2DImode:
4065 case V4SFmode:
4066 case V2DFmode:
4067 if (!type || !AGGREGATE_TYPE_P (type))
4068 {
4069 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4070 {
4071 warnedsse = true;
4072 warning (0, "SSE vector argument without SSE enabled "
4073 "changes the ABI");
4074 }
4075 if (cum->sse_nregs)
4076 ret = gen_reg_or_parallel (mode, orig_mode,
4077 cum->sse_regno + FIRST_SSE_REG);
4078 }
4079 break;
4080 case V8QImode:
4081 case V4HImode:
4082 case V2SImode:
4083 case V2SFmode:
4084 if (!type || !AGGREGATE_TYPE_P (type))
4085 {
4086 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4087 {
4088 warnedmmx = true;
4089 warning (0, "MMX vector argument without MMX enabled "
4090 "changes the ABI");
4091 }
4092 if (cum->mmx_nregs)
4093 ret = gen_reg_or_parallel (mode, orig_mode,
4094 cum->mmx_regno + FIRST_MMX_REG);
4095 }
4096 break;
4097 }
4098
4099 if (TARGET_DEBUG_ARG)
4100 {
4101 fprintf (stderr,
4102 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4103 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4104
4105 if (ret)
4106 print_simple_rtl (stderr, ret);
4107 else
4108 fprintf (stderr, ", stack");
4109
4110 fprintf (stderr, " )\n");
4111 }
4112
4113 return ret;
4114 }
4115
4116 /* A C expression that indicates when an argument must be passed by
4117 reference. If nonzero for an argument, a copy of that argument is
4118 made in memory and a pointer to the argument is passed instead of
4119 the argument itself. The pointer is passed in whatever way is
4120 appropriate for passing a pointer to that type. */
4121
4122 static bool
4123 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4124 enum machine_mode mode ATTRIBUTE_UNUSED,
4125 tree type, bool named ATTRIBUTE_UNUSED)
4126 {
4127 if (!TARGET_64BIT)
4128 return 0;
4129
4130 if (type && int_size_in_bytes (type) == -1)
4131 {
4132 if (TARGET_DEBUG_ARG)
4133 fprintf (stderr, "function_arg_pass_by_reference\n");
4134 return 1;
4135 }
4136
4137 return 0;
4138 }
4139
4140 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4141 ABI. Only called if TARGET_SSE. */
4142 static bool
4143 contains_128bit_aligned_vector_p (tree type)
4144 {
4145 enum machine_mode mode = TYPE_MODE (type);
4146 if (SSE_REG_MODE_P (mode)
4147 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4148 return true;
4149 if (TYPE_ALIGN (type) < 128)
4150 return false;
4151
4152 if (AGGREGATE_TYPE_P (type))
4153 {
4154 /* Walk the aggregates recursively. */
4155 switch (TREE_CODE (type))
4156 {
4157 case RECORD_TYPE:
4158 case UNION_TYPE:
4159 case QUAL_UNION_TYPE:
4160 {
4161 tree field;
4162
4163 /* Walk all the structure fields. */
4164 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4165 {
4166 if (TREE_CODE (field) == FIELD_DECL
4167 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4168 return true;
4169 }
4170 break;
4171 }
4172
4173 case ARRAY_TYPE:
4174 /* Just for use if some languages passes arrays by value. */
4175 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4176 return true;
4177 break;
4178
4179 default:
4180 gcc_unreachable ();
4181 }
4182 }
4183 return false;
4184 }
4185
4186 /* Gives the alignment boundary, in bits, of an argument with the
4187 specified mode and type. */
4188
4189 int
4190 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4191 {
4192 int align;
4193 if (type)
4194 align = TYPE_ALIGN (type);
4195 else
4196 align = GET_MODE_ALIGNMENT (mode);
4197 if (align < PARM_BOUNDARY)
4198 align = PARM_BOUNDARY;
4199 if (!TARGET_64BIT)
4200 {
4201 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4202 make an exception for SSE modes since these require 128bit
4203 alignment.
4204
4205 The handling here differs from field_alignment. ICC aligns MMX
4206 arguments to 4 byte boundaries, while structure fields are aligned
4207 to 8 byte boundaries. */
4208 if (!TARGET_SSE)
4209 align = PARM_BOUNDARY;
4210 else if (!type)
4211 {
4212 if (!SSE_REG_MODE_P (mode))
4213 align = PARM_BOUNDARY;
4214 }
4215 else
4216 {
4217 if (!contains_128bit_aligned_vector_p (type))
4218 align = PARM_BOUNDARY;
4219 }
4220 }
4221 if (align > 128)
4222 align = 128;
4223 return align;
4224 }
4225
4226 /* Return true if N is a possible register number of function value. */
4227 bool
4228 ix86_function_value_regno_p (int regno)
4229 {
4230 if (TARGET_MACHO)
4231 {
4232 if (!TARGET_64BIT)
4233 {
4234 return ((regno) == 0
4235 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4236 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4237 }
4238 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4239 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4240 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4241 }
4242 else
4243 {
4244 if (regno == 0
4245 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4246 || (regno == FIRST_SSE_REG && TARGET_SSE))
4247 return true;
4248
4249 if (!TARGET_64BIT
4250 && (regno == FIRST_MMX_REG && TARGET_MMX))
4251 return true;
4252
4253 return false;
4254 }
4255 }
4256
4257 /* Define how to find the value returned by a function.
4258 VALTYPE is the data type of the value (as a tree).
4259 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4260 otherwise, FUNC is 0. */
4261 rtx
4262 ix86_function_value (tree valtype, tree fntype_or_decl,
4263 bool outgoing ATTRIBUTE_UNUSED)
4264 {
4265 enum machine_mode natmode = type_natural_mode (valtype);
4266
4267 if (TARGET_64BIT)
4268 {
4269 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4270 1, REGPARM_MAX, SSE_REGPARM_MAX,
4271 x86_64_int_return_registers, 0);
4272 /* For zero sized structures, construct_container return NULL, but we
4273 need to keep rest of compiler happy by returning meaningful value. */
4274 if (!ret)
4275 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4276 return ret;
4277 }
4278 else
4279 {
4280 tree fn = NULL_TREE, fntype;
4281 if (fntype_or_decl
4282 && DECL_P (fntype_or_decl))
4283 fn = fntype_or_decl;
4284 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4285 return gen_rtx_REG (TYPE_MODE (valtype),
4286 ix86_value_regno (natmode, fn, fntype));
4287 }
4288 }
4289
4290 /* Return true iff type is returned in memory. */
4291 int
4292 ix86_return_in_memory (tree type)
4293 {
4294 int needed_intregs, needed_sseregs, size;
4295 enum machine_mode mode = type_natural_mode (type);
4296
4297 if (TARGET_64BIT)
4298 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4299
4300 if (mode == BLKmode)
4301 return 1;
4302
4303 size = int_size_in_bytes (type);
4304
4305 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4306 return 0;
4307
4308 if (VECTOR_MODE_P (mode) || mode == TImode)
4309 {
4310 /* User-created vectors small enough to fit in EAX. */
4311 if (size < 8)
4312 return 0;
4313
4314 /* MMX/3dNow values are returned in MM0,
4315 except when it doesn't exits. */
4316 if (size == 8)
4317 return (TARGET_MMX ? 0 : 1);
4318
4319 /* SSE values are returned in XMM0, except when it doesn't exist. */
4320 if (size == 16)
4321 return (TARGET_SSE ? 0 : 1);
4322 }
4323
4324 if (mode == XFmode)
4325 return 0;
4326
4327 if (mode == TDmode)
4328 return 1;
4329
4330 if (size > 12)
4331 return 1;
4332 return 0;
4333 }
4334
4335 /* When returning SSE vector types, we have a choice of either
4336 (1) being abi incompatible with a -march switch, or
4337 (2) generating an error.
4338 Given no good solution, I think the safest thing is one warning.
4339 The user won't be able to use -Werror, but....
4340
4341 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4342 called in response to actually generating a caller or callee that
4343 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4344 via aggregate_value_p for general type probing from tree-ssa. */
4345
4346 static rtx
4347 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4348 {
4349 static bool warnedsse, warnedmmx;
4350
4351 if (type)
4352 {
4353 /* Look at the return type of the function, not the function type. */
4354 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4355
4356 if (!TARGET_SSE && !warnedsse)
4357 {
4358 if (mode == TImode
4359 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4360 {
4361 warnedsse = true;
4362 warning (0, "SSE vector return without SSE enabled "
4363 "changes the ABI");
4364 }
4365 }
4366
4367 if (!TARGET_MMX && !warnedmmx)
4368 {
4369 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4370 {
4371 warnedmmx = true;
4372 warning (0, "MMX vector return without MMX enabled "
4373 "changes the ABI");
4374 }
4375 }
4376 }
4377
4378 return NULL;
4379 }
4380
4381 /* Define how to find the value returned by a library function
4382 assuming the value has mode MODE. */
4383 rtx
4384 ix86_libcall_value (enum machine_mode mode)
4385 {
4386 if (TARGET_64BIT)
4387 {
4388 switch (mode)
4389 {
4390 case SFmode:
4391 case SCmode:
4392 case DFmode:
4393 case DCmode:
4394 case TFmode:
4395 case SDmode:
4396 case DDmode:
4397 case TDmode:
4398 return gen_rtx_REG (mode, FIRST_SSE_REG);
4399 case XFmode:
4400 case XCmode:
4401 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4402 case TCmode:
4403 return NULL;
4404 default:
4405 return gen_rtx_REG (mode, 0);
4406 }
4407 }
4408 else
4409 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4410 }
4411
4412 /* Given a mode, return the register to use for a return value. */
4413
4414 static int
4415 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4416 {
4417 gcc_assert (!TARGET_64BIT);
4418
4419 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4420 we normally prevent this case when mmx is not available. However
4421 some ABIs may require the result to be returned like DImode. */
4422 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4423 return TARGET_MMX ? FIRST_MMX_REG : 0;
4424
4425 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4426 we prevent this case when sse is not available. However some ABIs
4427 may require the result to be returned like integer TImode. */
4428 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4429 return TARGET_SSE ? FIRST_SSE_REG : 0;
4430
4431 /* Decimal floating point values can go in %eax, unlike other float modes. */
4432 if (DECIMAL_FLOAT_MODE_P (mode))
4433 return 0;
4434
4435 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4436 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4437 return 0;
4438
4439 /* Floating point return values in %st(0), except for local functions when
4440 SSE math is enabled or for functions with sseregparm attribute. */
4441 if ((func || fntype)
4442 && (mode == SFmode || mode == DFmode))
4443 {
4444 int sse_level = ix86_function_sseregparm (fntype, func);
4445 if ((sse_level >= 1 && mode == SFmode)
4446 || (sse_level == 2 && mode == DFmode))
4447 return FIRST_SSE_REG;
4448 }
4449
4450 return FIRST_FLOAT_REG;
4451 }
4452 \f
4453 /* Create the va_list data type. */
4454
4455 static tree
4456 ix86_build_builtin_va_list (void)
4457 {
4458 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4459
4460 /* For i386 we use plain pointer to argument area. */
4461 if (!TARGET_64BIT)
4462 return build_pointer_type (char_type_node);
4463
4464 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4465 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4466
4467 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4468 unsigned_type_node);
4469 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4470 unsigned_type_node);
4471 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4472 ptr_type_node);
4473 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4474 ptr_type_node);
4475
4476 va_list_gpr_counter_field = f_gpr;
4477 va_list_fpr_counter_field = f_fpr;
4478
4479 DECL_FIELD_CONTEXT (f_gpr) = record;
4480 DECL_FIELD_CONTEXT (f_fpr) = record;
4481 DECL_FIELD_CONTEXT (f_ovf) = record;
4482 DECL_FIELD_CONTEXT (f_sav) = record;
4483
4484 TREE_CHAIN (record) = type_decl;
4485 TYPE_NAME (record) = type_decl;
4486 TYPE_FIELDS (record) = f_gpr;
4487 TREE_CHAIN (f_gpr) = f_fpr;
4488 TREE_CHAIN (f_fpr) = f_ovf;
4489 TREE_CHAIN (f_ovf) = f_sav;
4490
4491 layout_type (record);
4492
4493 /* The correct type is an array type of one element. */
4494 return build_array_type (record, build_index_type (size_zero_node));
4495 }
4496
4497 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4498
4499 static void
4500 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4501 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4502 int no_rtl)
4503 {
4504 CUMULATIVE_ARGS next_cum;
4505 rtx save_area = NULL_RTX, mem;
4506 rtx label;
4507 rtx label_ref;
4508 rtx tmp_reg;
4509 rtx nsse_reg;
4510 int set;
4511 tree fntype;
4512 int stdarg_p;
4513 int i;
4514
4515 if (!TARGET_64BIT)
4516 return;
4517
4518 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4519 return;
4520
4521 /* Indicate to allocate space on the stack for varargs save area. */
4522 ix86_save_varrargs_registers = 1;
4523
4524 cfun->stack_alignment_needed = 128;
4525
4526 fntype = TREE_TYPE (current_function_decl);
4527 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4528 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4529 != void_type_node));
4530
4531 /* For varargs, we do not want to skip the dummy va_dcl argument.
4532 For stdargs, we do want to skip the last named argument. */
4533 next_cum = *cum;
4534 if (stdarg_p)
4535 function_arg_advance (&next_cum, mode, type, 1);
4536
4537 if (!no_rtl)
4538 save_area = frame_pointer_rtx;
4539
4540 set = get_varargs_alias_set ();
4541
4542 for (i = next_cum.regno;
4543 i < ix86_regparm
4544 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4545 i++)
4546 {
4547 mem = gen_rtx_MEM (Pmode,
4548 plus_constant (save_area, i * UNITS_PER_WORD));
4549 MEM_NOTRAP_P (mem) = 1;
4550 set_mem_alias_set (mem, set);
4551 emit_move_insn (mem, gen_rtx_REG (Pmode,
4552 x86_64_int_parameter_registers[i]));
4553 }
4554
4555 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4556 {
4557 /* Now emit code to save SSE registers. The AX parameter contains number
4558 of SSE parameter registers used to call this function. We use
4559 sse_prologue_save insn template that produces computed jump across
4560 SSE saves. We need some preparation work to get this working. */
4561
4562 label = gen_label_rtx ();
4563 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4564
4565 /* Compute address to jump to :
4566 label - 5*eax + nnamed_sse_arguments*5 */
4567 tmp_reg = gen_reg_rtx (Pmode);
4568 nsse_reg = gen_reg_rtx (Pmode);
4569 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4570 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4571 gen_rtx_MULT (Pmode, nsse_reg,
4572 GEN_INT (4))));
4573 if (next_cum.sse_regno)
4574 emit_move_insn
4575 (nsse_reg,
4576 gen_rtx_CONST (DImode,
4577 gen_rtx_PLUS (DImode,
4578 label_ref,
4579 GEN_INT (next_cum.sse_regno * 4))));
4580 else
4581 emit_move_insn (nsse_reg, label_ref);
4582 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4583
4584 /* Compute address of memory block we save into. We always use pointer
4585 pointing 127 bytes after first byte to store - this is needed to keep
4586 instruction size limited by 4 bytes. */
4587 tmp_reg = gen_reg_rtx (Pmode);
4588 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4589 plus_constant (save_area,
4590 8 * REGPARM_MAX + 127)));
4591 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4592 MEM_NOTRAP_P (mem) = 1;
4593 set_mem_alias_set (mem, set);
4594 set_mem_align (mem, BITS_PER_WORD);
4595
4596 /* And finally do the dirty job! */
4597 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4598 GEN_INT (next_cum.sse_regno), label));
4599 }
4600
4601 }
4602
4603 /* Implement va_start. */
4604
4605 void
4606 ix86_va_start (tree valist, rtx nextarg)
4607 {
4608 HOST_WIDE_INT words, n_gpr, n_fpr;
4609 tree f_gpr, f_fpr, f_ovf, f_sav;
4610 tree gpr, fpr, ovf, sav, t;
4611 tree type;
4612
4613 /* Only 64bit target needs something special. */
4614 if (!TARGET_64BIT)
4615 {
4616 std_expand_builtin_va_start (valist, nextarg);
4617 return;
4618 }
4619
4620 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4621 f_fpr = TREE_CHAIN (f_gpr);
4622 f_ovf = TREE_CHAIN (f_fpr);
4623 f_sav = TREE_CHAIN (f_ovf);
4624
4625 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4626 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4627 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4628 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4629 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4630
4631 /* Count number of gp and fp argument registers used. */
4632 words = current_function_args_info.words;
4633 n_gpr = current_function_args_info.regno;
4634 n_fpr = current_function_args_info.sse_regno;
4635
4636 if (TARGET_DEBUG_ARG)
4637 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4638 (int) words, (int) n_gpr, (int) n_fpr);
4639
4640 if (cfun->va_list_gpr_size)
4641 {
4642 type = TREE_TYPE (gpr);
4643 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4644 build_int_cst (type, n_gpr * 8));
4645 TREE_SIDE_EFFECTS (t) = 1;
4646 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4647 }
4648
4649 if (cfun->va_list_fpr_size)
4650 {
4651 type = TREE_TYPE (fpr);
4652 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4653 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4654 TREE_SIDE_EFFECTS (t) = 1;
4655 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4656 }
4657
4658 /* Find the overflow area. */
4659 type = TREE_TYPE (ovf);
4660 t = make_tree (type, virtual_incoming_args_rtx);
4661 if (words != 0)
4662 t = build2 (PLUS_EXPR, type, t,
4663 build_int_cst (type, words * UNITS_PER_WORD));
4664 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4665 TREE_SIDE_EFFECTS (t) = 1;
4666 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4667
4668 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4669 {
4670 /* Find the register save area.
4671 Prologue of the function save it right above stack frame. */
4672 type = TREE_TYPE (sav);
4673 t = make_tree (type, frame_pointer_rtx);
4674 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4675 TREE_SIDE_EFFECTS (t) = 1;
4676 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4677 }
4678 }
4679
4680 /* Implement va_arg. */
4681
4682 tree
4683 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4684 {
4685 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4686 tree f_gpr, f_fpr, f_ovf, f_sav;
4687 tree gpr, fpr, ovf, sav, t;
4688 int size, rsize;
4689 tree lab_false, lab_over = NULL_TREE;
4690 tree addr, t2;
4691 rtx container;
4692 int indirect_p = 0;
4693 tree ptrtype;
4694 enum machine_mode nat_mode;
4695
4696 /* Only 64bit target needs something special. */
4697 if (!TARGET_64BIT)
4698 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4699
4700 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4701 f_fpr = TREE_CHAIN (f_gpr);
4702 f_ovf = TREE_CHAIN (f_fpr);
4703 f_sav = TREE_CHAIN (f_ovf);
4704
4705 valist = build_va_arg_indirect_ref (valist);
4706 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4707 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4708 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4709 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4710
4711 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4712 if (indirect_p)
4713 type = build_pointer_type (type);
4714 size = int_size_in_bytes (type);
4715 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4716
4717 nat_mode = type_natural_mode (type);
4718 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4719 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4720
4721 /* Pull the value out of the saved registers. */
4722
4723 addr = create_tmp_var (ptr_type_node, "addr");
4724 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4725
4726 if (container)
4727 {
4728 int needed_intregs, needed_sseregs;
4729 bool need_temp;
4730 tree int_addr, sse_addr;
4731
4732 lab_false = create_artificial_label ();
4733 lab_over = create_artificial_label ();
4734
4735 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4736
4737 need_temp = (!REG_P (container)
4738 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4739 || TYPE_ALIGN (type) > 128));
4740
4741 /* In case we are passing structure, verify that it is consecutive block
4742 on the register save area. If not we need to do moves. */
4743 if (!need_temp && !REG_P (container))
4744 {
4745 /* Verify that all registers are strictly consecutive */
4746 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4747 {
4748 int i;
4749
4750 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4751 {
4752 rtx slot = XVECEXP (container, 0, i);
4753 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4754 || INTVAL (XEXP (slot, 1)) != i * 16)
4755 need_temp = 1;
4756 }
4757 }
4758 else
4759 {
4760 int i;
4761
4762 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4763 {
4764 rtx slot = XVECEXP (container, 0, i);
4765 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4766 || INTVAL (XEXP (slot, 1)) != i * 8)
4767 need_temp = 1;
4768 }
4769 }
4770 }
4771 if (!need_temp)
4772 {
4773 int_addr = addr;
4774 sse_addr = addr;
4775 }
4776 else
4777 {
4778 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4779 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4780 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4781 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4782 }
4783
4784 /* First ensure that we fit completely in registers. */
4785 if (needed_intregs)
4786 {
4787 t = build_int_cst (TREE_TYPE (gpr),
4788 (REGPARM_MAX - needed_intregs + 1) * 8);
4789 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4790 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4791 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4792 gimplify_and_add (t, pre_p);
4793 }
4794 if (needed_sseregs)
4795 {
4796 t = build_int_cst (TREE_TYPE (fpr),
4797 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4798 + REGPARM_MAX * 8);
4799 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4800 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4801 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4802 gimplify_and_add (t, pre_p);
4803 }
4804
4805 /* Compute index to start of area used for integer regs. */
4806 if (needed_intregs)
4807 {
4808 /* int_addr = gpr + sav; */
4809 t = fold_convert (ptr_type_node, gpr);
4810 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4811 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4812 gimplify_and_add (t, pre_p);
4813 }
4814 if (needed_sseregs)
4815 {
4816 /* sse_addr = fpr + sav; */
4817 t = fold_convert (ptr_type_node, fpr);
4818 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4819 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4820 gimplify_and_add (t, pre_p);
4821 }
4822 if (need_temp)
4823 {
4824 int i;
4825 tree temp = create_tmp_var (type, "va_arg_tmp");
4826
4827 /* addr = &temp; */
4828 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4829 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4830 gimplify_and_add (t, pre_p);
4831
4832 for (i = 0; i < XVECLEN (container, 0); i++)
4833 {
4834 rtx slot = XVECEXP (container, 0, i);
4835 rtx reg = XEXP (slot, 0);
4836 enum machine_mode mode = GET_MODE (reg);
4837 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4838 tree addr_type = build_pointer_type (piece_type);
4839 tree src_addr, src;
4840 int src_offset;
4841 tree dest_addr, dest;
4842
4843 if (SSE_REGNO_P (REGNO (reg)))
4844 {
4845 src_addr = sse_addr;
4846 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4847 }
4848 else
4849 {
4850 src_addr = int_addr;
4851 src_offset = REGNO (reg) * 8;
4852 }
4853 src_addr = fold_convert (addr_type, src_addr);
4854 src_addr = fold (build2 (PLUS_EXPR, addr_type, src_addr,
4855 size_int (src_offset)));
4856 src = build_va_arg_indirect_ref (src_addr);
4857
4858 dest_addr = fold_convert (addr_type, addr);
4859 dest_addr = fold (build2 (PLUS_EXPR, addr_type, dest_addr,
4860 size_int (INTVAL (XEXP (slot, 1)))));
4861 dest = build_va_arg_indirect_ref (dest_addr);
4862
4863 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4864 gimplify_and_add (t, pre_p);
4865 }
4866 }
4867
4868 if (needed_intregs)
4869 {
4870 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4871 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4872 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4873 gimplify_and_add (t, pre_p);
4874 }
4875 if (needed_sseregs)
4876 {
4877 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4878 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4879 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4880 gimplify_and_add (t, pre_p);
4881 }
4882
4883 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4884 gimplify_and_add (t, pre_p);
4885
4886 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4887 append_to_statement_list (t, pre_p);
4888 }
4889
4890 /* ... otherwise out of the overflow area. */
4891
4892 /* Care for on-stack alignment if needed. */
4893 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4894 || integer_zerop (TYPE_SIZE (type)))
4895 t = ovf;
4896 else
4897 {
4898 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4899 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4900 build_int_cst (TREE_TYPE (ovf), align - 1));
4901 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4902 build_int_cst (TREE_TYPE (t), -align));
4903 }
4904 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4905
4906 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4907 gimplify_and_add (t2, pre_p);
4908
4909 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4910 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4911 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4912 gimplify_and_add (t, pre_p);
4913
4914 if (container)
4915 {
4916 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4917 append_to_statement_list (t, pre_p);
4918 }
4919
4920 ptrtype = build_pointer_type (type);
4921 addr = fold_convert (ptrtype, addr);
4922
4923 if (indirect_p)
4924 addr = build_va_arg_indirect_ref (addr);
4925 return build_va_arg_indirect_ref (addr);
4926 }
4927 \f
4928 /* Return nonzero if OPNUM's MEM should be matched
4929 in movabs* patterns. */
4930
4931 int
4932 ix86_check_movabs (rtx insn, int opnum)
4933 {
4934 rtx set, mem;
4935
4936 set = PATTERN (insn);
4937 if (GET_CODE (set) == PARALLEL)
4938 set = XVECEXP (set, 0, 0);
4939 gcc_assert (GET_CODE (set) == SET);
4940 mem = XEXP (set, opnum);
4941 while (GET_CODE (mem) == SUBREG)
4942 mem = SUBREG_REG (mem);
4943 gcc_assert (MEM_P (mem));
4944 return (volatile_ok || !MEM_VOLATILE_P (mem));
4945 }
4946 \f
4947 /* Initialize the table of extra 80387 mathematical constants. */
4948
4949 static void
4950 init_ext_80387_constants (void)
4951 {
4952 static const char * cst[5] =
4953 {
4954 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4955 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4956 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4957 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4958 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4959 };
4960 int i;
4961
4962 for (i = 0; i < 5; i++)
4963 {
4964 real_from_string (&ext_80387_constants_table[i], cst[i]);
4965 /* Ensure each constant is rounded to XFmode precision. */
4966 real_convert (&ext_80387_constants_table[i],
4967 XFmode, &ext_80387_constants_table[i]);
4968 }
4969
4970 ext_80387_constants_init = 1;
4971 }
4972
4973 /* Return true if the constant is something that can be loaded with
4974 a special instruction. */
4975
4976 int
4977 standard_80387_constant_p (rtx x)
4978 {
4979 REAL_VALUE_TYPE r;
4980
4981 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4982 return -1;
4983
4984 if (x == CONST0_RTX (GET_MODE (x)))
4985 return 1;
4986 if (x == CONST1_RTX (GET_MODE (x)))
4987 return 2;
4988
4989 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4990
4991 /* For XFmode constants, try to find a special 80387 instruction when
4992 optimizing for size or on those CPUs that benefit from them. */
4993 if (GET_MODE (x) == XFmode
4994 && (optimize_size || x86_ext_80387_constants & TUNEMASK))
4995 {
4996 int i;
4997
4998 if (! ext_80387_constants_init)
4999 init_ext_80387_constants ();
5000
5001 for (i = 0; i < 5; i++)
5002 if (real_identical (&r, &ext_80387_constants_table[i]))
5003 return i + 3;
5004 }
5005
5006 /* Load of the constant -0.0 or -1.0 will be split as
5007 fldz;fchs or fld1;fchs sequence. */
5008 if (real_isnegzero (&r))
5009 return 8;
5010 if (real_identical (&r, &dconstm1))
5011 return 9;
5012
5013 return 0;
5014 }
5015
5016 /* Return the opcode of the special instruction to be used to load
5017 the constant X. */
5018
5019 const char *
5020 standard_80387_constant_opcode (rtx x)
5021 {
5022 switch (standard_80387_constant_p (x))
5023 {
5024 case 1:
5025 return "fldz";
5026 case 2:
5027 return "fld1";
5028 case 3:
5029 return "fldlg2";
5030 case 4:
5031 return "fldln2";
5032 case 5:
5033 return "fldl2e";
5034 case 6:
5035 return "fldl2t";
5036 case 7:
5037 return "fldpi";
5038 case 8:
5039 case 9:
5040 return "#";
5041 default:
5042 gcc_unreachable ();
5043 }
5044 }
5045
5046 /* Return the CONST_DOUBLE representing the 80387 constant that is
5047 loaded by the specified special instruction. The argument IDX
5048 matches the return value from standard_80387_constant_p. */
5049
5050 rtx
5051 standard_80387_constant_rtx (int idx)
5052 {
5053 int i;
5054
5055 if (! ext_80387_constants_init)
5056 init_ext_80387_constants ();
5057
5058 switch (idx)
5059 {
5060 case 3:
5061 case 4:
5062 case 5:
5063 case 6:
5064 case 7:
5065 i = idx - 3;
5066 break;
5067
5068 default:
5069 gcc_unreachable ();
5070 }
5071
5072 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5073 XFmode);
5074 }
5075
5076 /* Return 1 if mode is a valid mode for sse. */
5077 static int
5078 standard_sse_mode_p (enum machine_mode mode)
5079 {
5080 switch (mode)
5081 {
5082 case V16QImode:
5083 case V8HImode:
5084 case V4SImode:
5085 case V2DImode:
5086 case V4SFmode:
5087 case V2DFmode:
5088 return 1;
5089
5090 default:
5091 return 0;
5092 }
5093 }
5094
5095 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5096 */
5097 int
5098 standard_sse_constant_p (rtx x)
5099 {
5100 enum machine_mode mode = GET_MODE (x);
5101
5102 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5103 return 1;
5104 if (vector_all_ones_operand (x, mode)
5105 && standard_sse_mode_p (mode))
5106 return TARGET_SSE2 ? 2 : -1;
5107
5108 return 0;
5109 }
5110
5111 /* Return the opcode of the special instruction to be used to load
5112 the constant X. */
5113
5114 const char *
5115 standard_sse_constant_opcode (rtx insn, rtx x)
5116 {
5117 switch (standard_sse_constant_p (x))
5118 {
5119 case 1:
5120 if (get_attr_mode (insn) == MODE_V4SF)
5121 return "xorps\t%0, %0";
5122 else if (get_attr_mode (insn) == MODE_V2DF)
5123 return "xorpd\t%0, %0";
5124 else
5125 return "pxor\t%0, %0";
5126 case 2:
5127 return "pcmpeqd\t%0, %0";
5128 }
5129 gcc_unreachable ();
5130 }
5131
5132 /* Returns 1 if OP contains a symbol reference */
5133
5134 int
5135 symbolic_reference_mentioned_p (rtx op)
5136 {
5137 const char *fmt;
5138 int i;
5139
5140 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5141 return 1;
5142
5143 fmt = GET_RTX_FORMAT (GET_CODE (op));
5144 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5145 {
5146 if (fmt[i] == 'E')
5147 {
5148 int j;
5149
5150 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5151 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5152 return 1;
5153 }
5154
5155 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5156 return 1;
5157 }
5158
5159 return 0;
5160 }
5161
5162 /* Return 1 if it is appropriate to emit `ret' instructions in the
5163 body of a function. Do this only if the epilogue is simple, needing a
5164 couple of insns. Prior to reloading, we can't tell how many registers
5165 must be saved, so return 0 then. Return 0 if there is no frame
5166 marker to de-allocate. */
5167
5168 int
5169 ix86_can_use_return_insn_p (void)
5170 {
5171 struct ix86_frame frame;
5172
5173 if (! reload_completed || frame_pointer_needed)
5174 return 0;
5175
5176 /* Don't allow more than 32 pop, since that's all we can do
5177 with one instruction. */
5178 if (current_function_pops_args
5179 && current_function_args_size >= 32768)
5180 return 0;
5181
5182 ix86_compute_frame_layout (&frame);
5183 return frame.to_allocate == 0 && frame.nregs == 0;
5184 }
5185 \f
5186 /* Value should be nonzero if functions must have frame pointers.
5187 Zero means the frame pointer need not be set up (and parms may
5188 be accessed via the stack pointer) in functions that seem suitable. */
5189
5190 int
5191 ix86_frame_pointer_required (void)
5192 {
5193 /* If we accessed previous frames, then the generated code expects
5194 to be able to access the saved ebp value in our frame. */
5195 if (cfun->machine->accesses_prev_frame)
5196 return 1;
5197
5198 /* Several x86 os'es need a frame pointer for other reasons,
5199 usually pertaining to setjmp. */
5200 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5201 return 1;
5202
5203 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5204 the frame pointer by default. Turn it back on now if we've not
5205 got a leaf function. */
5206 if (TARGET_OMIT_LEAF_FRAME_POINTER
5207 && (!current_function_is_leaf
5208 || ix86_current_function_calls_tls_descriptor))
5209 return 1;
5210
5211 if (current_function_profile)
5212 return 1;
5213
5214 return 0;
5215 }
5216
5217 /* Record that the current function accesses previous call frames. */
5218
5219 void
5220 ix86_setup_frame_addresses (void)
5221 {
5222 cfun->machine->accesses_prev_frame = 1;
5223 }
5224 \f
5225 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5226 # define USE_HIDDEN_LINKONCE 1
5227 #else
5228 # define USE_HIDDEN_LINKONCE 0
5229 #endif
5230
5231 static int pic_labels_used;
5232
5233 /* Fills in the label name that should be used for a pc thunk for
5234 the given register. */
5235
5236 static void
5237 get_pc_thunk_name (char name[32], unsigned int regno)
5238 {
5239 gcc_assert (!TARGET_64BIT);
5240
5241 if (USE_HIDDEN_LINKONCE)
5242 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5243 else
5244 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5245 }
5246
5247
5248 /* This function generates code for -fpic that loads %ebx with
5249 the return address of the caller and then returns. */
5250
5251 void
5252 ix86_file_end (void)
5253 {
5254 rtx xops[2];
5255 int regno;
5256
5257 for (regno = 0; regno < 8; ++regno)
5258 {
5259 char name[32];
5260
5261 if (! ((pic_labels_used >> regno) & 1))
5262 continue;
5263
5264 get_pc_thunk_name (name, regno);
5265
5266 #if TARGET_MACHO
5267 if (TARGET_MACHO)
5268 {
5269 switch_to_section (darwin_sections[text_coal_section]);
5270 fputs ("\t.weak_definition\t", asm_out_file);
5271 assemble_name (asm_out_file, name);
5272 fputs ("\n\t.private_extern\t", asm_out_file);
5273 assemble_name (asm_out_file, name);
5274 fputs ("\n", asm_out_file);
5275 ASM_OUTPUT_LABEL (asm_out_file, name);
5276 }
5277 else
5278 #endif
5279 if (USE_HIDDEN_LINKONCE)
5280 {
5281 tree decl;
5282
5283 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5284 error_mark_node);
5285 TREE_PUBLIC (decl) = 1;
5286 TREE_STATIC (decl) = 1;
5287 DECL_ONE_ONLY (decl) = 1;
5288
5289 (*targetm.asm_out.unique_section) (decl, 0);
5290 switch_to_section (get_named_section (decl, NULL, 0));
5291
5292 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5293 fputs ("\t.hidden\t", asm_out_file);
5294 assemble_name (asm_out_file, name);
5295 fputc ('\n', asm_out_file);
5296 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5297 }
5298 else
5299 {
5300 switch_to_section (text_section);
5301 ASM_OUTPUT_LABEL (asm_out_file, name);
5302 }
5303
5304 xops[0] = gen_rtx_REG (SImode, regno);
5305 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5306 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5307 output_asm_insn ("ret", xops);
5308 }
5309
5310 if (NEED_INDICATE_EXEC_STACK)
5311 file_end_indicate_exec_stack ();
5312 }
5313
5314 /* Emit code for the SET_GOT patterns. */
5315
5316 const char *
5317 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5318 {
5319 rtx xops[3];
5320
5321 xops[0] = dest;
5322 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5323
5324 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5325 {
5326 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5327
5328 if (!flag_pic)
5329 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5330 else
5331 output_asm_insn ("call\t%a2", xops);
5332
5333 #if TARGET_MACHO
5334 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5335 is what will be referenced by the Mach-O PIC subsystem. */
5336 if (!label)
5337 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5338 #endif
5339
5340 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5341 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5342
5343 if (flag_pic)
5344 output_asm_insn ("pop{l}\t%0", xops);
5345 }
5346 else
5347 {
5348 char name[32];
5349 get_pc_thunk_name (name, REGNO (dest));
5350 pic_labels_used |= 1 << REGNO (dest);
5351
5352 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5353 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5354 output_asm_insn ("call\t%X2", xops);
5355 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5356 is what will be referenced by the Mach-O PIC subsystem. */
5357 #if TARGET_MACHO
5358 if (!label)
5359 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5360 else
5361 targetm.asm_out.internal_label (asm_out_file, "L",
5362 CODE_LABEL_NUMBER (label));
5363 #endif
5364 }
5365
5366 if (TARGET_MACHO)
5367 return "";
5368
5369 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5370 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5371 else
5372 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5373
5374 return "";
5375 }
5376
5377 /* Generate an "push" pattern for input ARG. */
5378
5379 static rtx
5380 gen_push (rtx arg)
5381 {
5382 return gen_rtx_SET (VOIDmode,
5383 gen_rtx_MEM (Pmode,
5384 gen_rtx_PRE_DEC (Pmode,
5385 stack_pointer_rtx)),
5386 arg);
5387 }
5388
5389 /* Return >= 0 if there is an unused call-clobbered register available
5390 for the entire function. */
5391
5392 static unsigned int
5393 ix86_select_alt_pic_regnum (void)
5394 {
5395 if (current_function_is_leaf && !current_function_profile
5396 && !ix86_current_function_calls_tls_descriptor)
5397 {
5398 int i;
5399 for (i = 2; i >= 0; --i)
5400 if (!regs_ever_live[i])
5401 return i;
5402 }
5403
5404 return INVALID_REGNUM;
5405 }
5406
5407 /* Return 1 if we need to save REGNO. */
5408 static int
5409 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5410 {
5411 if (pic_offset_table_rtx
5412 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5413 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5414 || current_function_profile
5415 || current_function_calls_eh_return
5416 || current_function_uses_const_pool))
5417 {
5418 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5419 return 0;
5420 return 1;
5421 }
5422
5423 if (current_function_calls_eh_return && maybe_eh_return)
5424 {
5425 unsigned i;
5426 for (i = 0; ; i++)
5427 {
5428 unsigned test = EH_RETURN_DATA_REGNO (i);
5429 if (test == INVALID_REGNUM)
5430 break;
5431 if (test == regno)
5432 return 1;
5433 }
5434 }
5435
5436 if (cfun->machine->force_align_arg_pointer
5437 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5438 return 1;
5439
5440 return (regs_ever_live[regno]
5441 && !call_used_regs[regno]
5442 && !fixed_regs[regno]
5443 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5444 }
5445
5446 /* Return number of registers to be saved on the stack. */
5447
5448 static int
5449 ix86_nsaved_regs (void)
5450 {
5451 int nregs = 0;
5452 int regno;
5453
5454 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5455 if (ix86_save_reg (regno, true))
5456 nregs++;
5457 return nregs;
5458 }
5459
5460 /* Return the offset between two registers, one to be eliminated, and the other
5461 its replacement, at the start of a routine. */
5462
5463 HOST_WIDE_INT
5464 ix86_initial_elimination_offset (int from, int to)
5465 {
5466 struct ix86_frame frame;
5467 ix86_compute_frame_layout (&frame);
5468
5469 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5470 return frame.hard_frame_pointer_offset;
5471 else if (from == FRAME_POINTER_REGNUM
5472 && to == HARD_FRAME_POINTER_REGNUM)
5473 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5474 else
5475 {
5476 gcc_assert (to == STACK_POINTER_REGNUM);
5477
5478 if (from == ARG_POINTER_REGNUM)
5479 return frame.stack_pointer_offset;
5480
5481 gcc_assert (from == FRAME_POINTER_REGNUM);
5482 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5483 }
5484 }
5485
5486 /* Fill structure ix86_frame about frame of currently computed function. */
5487
5488 static void
5489 ix86_compute_frame_layout (struct ix86_frame *frame)
5490 {
5491 HOST_WIDE_INT total_size;
5492 unsigned int stack_alignment_needed;
5493 HOST_WIDE_INT offset;
5494 unsigned int preferred_alignment;
5495 HOST_WIDE_INT size = get_frame_size ();
5496
5497 frame->nregs = ix86_nsaved_regs ();
5498 total_size = size;
5499
5500 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5501 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5502
5503 /* During reload iteration the amount of registers saved can change.
5504 Recompute the value as needed. Do not recompute when amount of registers
5505 didn't change as reload does multiple calls to the function and does not
5506 expect the decision to change within single iteration. */
5507 if (!optimize_size
5508 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5509 {
5510 int count = frame->nregs;
5511
5512 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5513 /* The fast prologue uses move instead of push to save registers. This
5514 is significantly longer, but also executes faster as modern hardware
5515 can execute the moves in parallel, but can't do that for push/pop.
5516
5517 Be careful about choosing what prologue to emit: When function takes
5518 many instructions to execute we may use slow version as well as in
5519 case function is known to be outside hot spot (this is known with
5520 feedback only). Weight the size of function by number of registers
5521 to save as it is cheap to use one or two push instructions but very
5522 slow to use many of them. */
5523 if (count)
5524 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5525 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5526 || (flag_branch_probabilities
5527 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5528 cfun->machine->use_fast_prologue_epilogue = false;
5529 else
5530 cfun->machine->use_fast_prologue_epilogue
5531 = !expensive_function_p (count);
5532 }
5533 if (TARGET_PROLOGUE_USING_MOVE
5534 && cfun->machine->use_fast_prologue_epilogue)
5535 frame->save_regs_using_mov = true;
5536 else
5537 frame->save_regs_using_mov = false;
5538
5539
5540 /* Skip return address and saved base pointer. */
5541 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5542
5543 frame->hard_frame_pointer_offset = offset;
5544
5545 /* Do some sanity checking of stack_alignment_needed and
5546 preferred_alignment, since i386 port is the only using those features
5547 that may break easily. */
5548
5549 gcc_assert (!size || stack_alignment_needed);
5550 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5551 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5552 gcc_assert (stack_alignment_needed
5553 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5554
5555 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5556 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5557
5558 /* Register save area */
5559 offset += frame->nregs * UNITS_PER_WORD;
5560
5561 /* Va-arg area */
5562 if (ix86_save_varrargs_registers)
5563 {
5564 offset += X86_64_VARARGS_SIZE;
5565 frame->va_arg_size = X86_64_VARARGS_SIZE;
5566 }
5567 else
5568 frame->va_arg_size = 0;
5569
5570 /* Align start of frame for local function. */
5571 frame->padding1 = ((offset + stack_alignment_needed - 1)
5572 & -stack_alignment_needed) - offset;
5573
5574 offset += frame->padding1;
5575
5576 /* Frame pointer points here. */
5577 frame->frame_pointer_offset = offset;
5578
5579 offset += size;
5580
5581 /* Add outgoing arguments area. Can be skipped if we eliminated
5582 all the function calls as dead code.
5583 Skipping is however impossible when function calls alloca. Alloca
5584 expander assumes that last current_function_outgoing_args_size
5585 of stack frame are unused. */
5586 if (ACCUMULATE_OUTGOING_ARGS
5587 && (!current_function_is_leaf || current_function_calls_alloca
5588 || ix86_current_function_calls_tls_descriptor))
5589 {
5590 offset += current_function_outgoing_args_size;
5591 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5592 }
5593 else
5594 frame->outgoing_arguments_size = 0;
5595
5596 /* Align stack boundary. Only needed if we're calling another function
5597 or using alloca. */
5598 if (!current_function_is_leaf || current_function_calls_alloca
5599 || ix86_current_function_calls_tls_descriptor)
5600 frame->padding2 = ((offset + preferred_alignment - 1)
5601 & -preferred_alignment) - offset;
5602 else
5603 frame->padding2 = 0;
5604
5605 offset += frame->padding2;
5606
5607 /* We've reached end of stack frame. */
5608 frame->stack_pointer_offset = offset;
5609
5610 /* Size prologue needs to allocate. */
5611 frame->to_allocate =
5612 (size + frame->padding1 + frame->padding2
5613 + frame->outgoing_arguments_size + frame->va_arg_size);
5614
5615 if ((!frame->to_allocate && frame->nregs <= 1)
5616 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5617 frame->save_regs_using_mov = false;
5618
5619 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5620 && current_function_is_leaf
5621 && !ix86_current_function_calls_tls_descriptor)
5622 {
5623 frame->red_zone_size = frame->to_allocate;
5624 if (frame->save_regs_using_mov)
5625 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5626 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5627 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5628 }
5629 else
5630 frame->red_zone_size = 0;
5631 frame->to_allocate -= frame->red_zone_size;
5632 frame->stack_pointer_offset -= frame->red_zone_size;
5633 #if 0
5634 fprintf (stderr, "\n");
5635 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5636 fprintf (stderr, "size: %ld\n", (long)size);
5637 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5638 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5639 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5640 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5641 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5642 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5643 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5644 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5645 (long)frame->hard_frame_pointer_offset);
5646 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5647 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5648 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5649 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5650 #endif
5651 }
5652
5653 /* Emit code to save registers in the prologue. */
5654
5655 static void
5656 ix86_emit_save_regs (void)
5657 {
5658 unsigned int regno;
5659 rtx insn;
5660
5661 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5662 if (ix86_save_reg (regno, true))
5663 {
5664 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5665 RTX_FRAME_RELATED_P (insn) = 1;
5666 }
5667 }
5668
5669 /* Emit code to save registers using MOV insns. First register
5670 is restored from POINTER + OFFSET. */
5671 static void
5672 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5673 {
5674 unsigned int regno;
5675 rtx insn;
5676
5677 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5678 if (ix86_save_reg (regno, true))
5679 {
5680 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5681 Pmode, offset),
5682 gen_rtx_REG (Pmode, regno));
5683 RTX_FRAME_RELATED_P (insn) = 1;
5684 offset += UNITS_PER_WORD;
5685 }
5686 }
5687
5688 /* Expand prologue or epilogue stack adjustment.
5689 The pattern exist to put a dependency on all ebp-based memory accesses.
5690 STYLE should be negative if instructions should be marked as frame related,
5691 zero if %r11 register is live and cannot be freely used and positive
5692 otherwise. */
5693
5694 static void
5695 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5696 {
5697 rtx insn;
5698
5699 if (! TARGET_64BIT)
5700 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5701 else if (x86_64_immediate_operand (offset, DImode))
5702 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5703 else
5704 {
5705 rtx r11;
5706 /* r11 is used by indirect sibcall return as well, set before the
5707 epilogue and used after the epilogue. ATM indirect sibcall
5708 shouldn't be used together with huge frame sizes in one
5709 function because of the frame_size check in sibcall.c. */
5710 gcc_assert (style);
5711 r11 = gen_rtx_REG (DImode, R11_REG);
5712 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5713 if (style < 0)
5714 RTX_FRAME_RELATED_P (insn) = 1;
5715 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5716 offset));
5717 }
5718 if (style < 0)
5719 RTX_FRAME_RELATED_P (insn) = 1;
5720 }
5721
5722 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5723
5724 static rtx
5725 ix86_internal_arg_pointer (void)
5726 {
5727 bool has_force_align_arg_pointer =
5728 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5729 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5730 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5731 && DECL_NAME (current_function_decl)
5732 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5733 && DECL_FILE_SCOPE_P (current_function_decl))
5734 || ix86_force_align_arg_pointer
5735 || has_force_align_arg_pointer)
5736 {
5737 /* Nested functions can't realign the stack due to a register
5738 conflict. */
5739 if (DECL_CONTEXT (current_function_decl)
5740 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5741 {
5742 if (ix86_force_align_arg_pointer)
5743 warning (0, "-mstackrealign ignored for nested functions");
5744 if (has_force_align_arg_pointer)
5745 error ("%s not supported for nested functions",
5746 ix86_force_align_arg_pointer_string);
5747 return virtual_incoming_args_rtx;
5748 }
5749 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5750 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5751 }
5752 else
5753 return virtual_incoming_args_rtx;
5754 }
5755
5756 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5757 This is called from dwarf2out.c to emit call frame instructions
5758 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5759 static void
5760 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5761 {
5762 rtx unspec = SET_SRC (pattern);
5763 gcc_assert (GET_CODE (unspec) == UNSPEC);
5764
5765 switch (index)
5766 {
5767 case UNSPEC_REG_SAVE:
5768 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5769 SET_DEST (pattern));
5770 break;
5771 case UNSPEC_DEF_CFA:
5772 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5773 INTVAL (XVECEXP (unspec, 0, 0)));
5774 break;
5775 default:
5776 gcc_unreachable ();
5777 }
5778 }
5779
5780 /* Expand the prologue into a bunch of separate insns. */
5781
5782 void
5783 ix86_expand_prologue (void)
5784 {
5785 rtx insn;
5786 bool pic_reg_used;
5787 struct ix86_frame frame;
5788 HOST_WIDE_INT allocate;
5789
5790 ix86_compute_frame_layout (&frame);
5791
5792 if (cfun->machine->force_align_arg_pointer)
5793 {
5794 rtx x, y;
5795
5796 /* Grab the argument pointer. */
5797 x = plus_constant (stack_pointer_rtx, 4);
5798 y = cfun->machine->force_align_arg_pointer;
5799 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5800 RTX_FRAME_RELATED_P (insn) = 1;
5801
5802 /* The unwind info consists of two parts: install the fafp as the cfa,
5803 and record the fafp as the "save register" of the stack pointer.
5804 The later is there in order that the unwinder can see where it
5805 should restore the stack pointer across the and insn. */
5806 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5807 x = gen_rtx_SET (VOIDmode, y, x);
5808 RTX_FRAME_RELATED_P (x) = 1;
5809 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5810 UNSPEC_REG_SAVE);
5811 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5812 RTX_FRAME_RELATED_P (y) = 1;
5813 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5814 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5815 REG_NOTES (insn) = x;
5816
5817 /* Align the stack. */
5818 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5819 GEN_INT (-16)));
5820
5821 /* And here we cheat like madmen with the unwind info. We force the
5822 cfa register back to sp+4, which is exactly what it was at the
5823 start of the function. Re-pushing the return address results in
5824 the return at the same spot relative to the cfa, and thus is
5825 correct wrt the unwind info. */
5826 x = cfun->machine->force_align_arg_pointer;
5827 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5828 insn = emit_insn (gen_push (x));
5829 RTX_FRAME_RELATED_P (insn) = 1;
5830
5831 x = GEN_INT (4);
5832 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5833 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5834 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5835 REG_NOTES (insn) = x;
5836 }
5837
5838 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5839 slower on all targets. Also sdb doesn't like it. */
5840
5841 if (frame_pointer_needed)
5842 {
5843 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5844 RTX_FRAME_RELATED_P (insn) = 1;
5845
5846 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5847 RTX_FRAME_RELATED_P (insn) = 1;
5848 }
5849
5850 allocate = frame.to_allocate;
5851
5852 if (!frame.save_regs_using_mov)
5853 ix86_emit_save_regs ();
5854 else
5855 allocate += frame.nregs * UNITS_PER_WORD;
5856
5857 /* When using red zone we may start register saving before allocating
5858 the stack frame saving one cycle of the prologue. */
5859 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5860 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5861 : stack_pointer_rtx,
5862 -frame.nregs * UNITS_PER_WORD);
5863
5864 if (allocate == 0)
5865 ;
5866 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5867 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5868 GEN_INT (-allocate), -1);
5869 else
5870 {
5871 /* Only valid for Win32. */
5872 rtx eax = gen_rtx_REG (SImode, 0);
5873 bool eax_live = ix86_eax_live_at_start_p ();
5874 rtx t;
5875
5876 gcc_assert (!TARGET_64BIT);
5877
5878 if (eax_live)
5879 {
5880 emit_insn (gen_push (eax));
5881 allocate -= 4;
5882 }
5883
5884 emit_move_insn (eax, GEN_INT (allocate));
5885
5886 insn = emit_insn (gen_allocate_stack_worker (eax));
5887 RTX_FRAME_RELATED_P (insn) = 1;
5888 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5889 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5890 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5891 t, REG_NOTES (insn));
5892
5893 if (eax_live)
5894 {
5895 if (frame_pointer_needed)
5896 t = plus_constant (hard_frame_pointer_rtx,
5897 allocate
5898 - frame.to_allocate
5899 - frame.nregs * UNITS_PER_WORD);
5900 else
5901 t = plus_constant (stack_pointer_rtx, allocate);
5902 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5903 }
5904 }
5905
5906 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5907 {
5908 if (!frame_pointer_needed || !frame.to_allocate)
5909 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5910 else
5911 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5912 -frame.nregs * UNITS_PER_WORD);
5913 }
5914
5915 pic_reg_used = false;
5916 if (pic_offset_table_rtx
5917 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5918 || current_function_profile))
5919 {
5920 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5921
5922 if (alt_pic_reg_used != INVALID_REGNUM)
5923 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5924
5925 pic_reg_used = true;
5926 }
5927
5928 if (pic_reg_used)
5929 {
5930 if (TARGET_64BIT)
5931 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5932 else
5933 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5934
5935 /* Even with accurate pre-reload life analysis, we can wind up
5936 deleting all references to the pic register after reload.
5937 Consider if cross-jumping unifies two sides of a branch
5938 controlled by a comparison vs the only read from a global.
5939 In which case, allow the set_got to be deleted, though we're
5940 too late to do anything about the ebx save in the prologue. */
5941 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5942 }
5943
5944 /* Prevent function calls from be scheduled before the call to mcount.
5945 In the pic_reg_used case, make sure that the got load isn't deleted. */
5946 if (current_function_profile)
5947 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5948 }
5949
5950 /* Emit code to restore saved registers using MOV insns. First register
5951 is restored from POINTER + OFFSET. */
5952 static void
5953 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5954 int maybe_eh_return)
5955 {
5956 int regno;
5957 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5958
5959 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5960 if (ix86_save_reg (regno, maybe_eh_return))
5961 {
5962 /* Ensure that adjust_address won't be forced to produce pointer
5963 out of range allowed by x86-64 instruction set. */
5964 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5965 {
5966 rtx r11;
5967
5968 r11 = gen_rtx_REG (DImode, R11_REG);
5969 emit_move_insn (r11, GEN_INT (offset));
5970 emit_insn (gen_adddi3 (r11, r11, pointer));
5971 base_address = gen_rtx_MEM (Pmode, r11);
5972 offset = 0;
5973 }
5974 emit_move_insn (gen_rtx_REG (Pmode, regno),
5975 adjust_address (base_address, Pmode, offset));
5976 offset += UNITS_PER_WORD;
5977 }
5978 }
5979
5980 /* Restore function stack, frame, and registers. */
5981
5982 void
5983 ix86_expand_epilogue (int style)
5984 {
5985 int regno;
5986 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5987 struct ix86_frame frame;
5988 HOST_WIDE_INT offset;
5989
5990 ix86_compute_frame_layout (&frame);
5991
5992 /* Calculate start of saved registers relative to ebp. Special care
5993 must be taken for the normal return case of a function using
5994 eh_return: the eax and edx registers are marked as saved, but not
5995 restored along this path. */
5996 offset = frame.nregs;
5997 if (current_function_calls_eh_return && style != 2)
5998 offset -= 2;
5999 offset *= -UNITS_PER_WORD;
6000
6001 /* If we're only restoring one register and sp is not valid then
6002 using a move instruction to restore the register since it's
6003 less work than reloading sp and popping the register.
6004
6005 The default code result in stack adjustment using add/lea instruction,
6006 while this code results in LEAVE instruction (or discrete equivalent),
6007 so it is profitable in some other cases as well. Especially when there
6008 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6009 and there is exactly one register to pop. This heuristic may need some
6010 tuning in future. */
6011 if ((!sp_valid && frame.nregs <= 1)
6012 || (TARGET_EPILOGUE_USING_MOVE
6013 && cfun->machine->use_fast_prologue_epilogue
6014 && (frame.nregs > 1 || frame.to_allocate))
6015 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6016 || (frame_pointer_needed && TARGET_USE_LEAVE
6017 && cfun->machine->use_fast_prologue_epilogue
6018 && frame.nregs == 1)
6019 || current_function_calls_eh_return)
6020 {
6021 /* Restore registers. We can use ebp or esp to address the memory
6022 locations. If both are available, default to ebp, since offsets
6023 are known to be small. Only exception is esp pointing directly to the
6024 end of block of saved registers, where we may simplify addressing
6025 mode. */
6026
6027 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6028 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6029 frame.to_allocate, style == 2);
6030 else
6031 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6032 offset, style == 2);
6033
6034 /* eh_return epilogues need %ecx added to the stack pointer. */
6035 if (style == 2)
6036 {
6037 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6038
6039 if (frame_pointer_needed)
6040 {
6041 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6042 tmp = plus_constant (tmp, UNITS_PER_WORD);
6043 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6044
6045 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6046 emit_move_insn (hard_frame_pointer_rtx, tmp);
6047
6048 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6049 const0_rtx, style);
6050 }
6051 else
6052 {
6053 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6054 tmp = plus_constant (tmp, (frame.to_allocate
6055 + frame.nregs * UNITS_PER_WORD));
6056 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6057 }
6058 }
6059 else if (!frame_pointer_needed)
6060 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6061 GEN_INT (frame.to_allocate
6062 + frame.nregs * UNITS_PER_WORD),
6063 style);
6064 /* If not an i386, mov & pop is faster than "leave". */
6065 else if (TARGET_USE_LEAVE || optimize_size
6066 || !cfun->machine->use_fast_prologue_epilogue)
6067 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6068 else
6069 {
6070 pro_epilogue_adjust_stack (stack_pointer_rtx,
6071 hard_frame_pointer_rtx,
6072 const0_rtx, style);
6073 if (TARGET_64BIT)
6074 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6075 else
6076 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6077 }
6078 }
6079 else
6080 {
6081 /* First step is to deallocate the stack frame so that we can
6082 pop the registers. */
6083 if (!sp_valid)
6084 {
6085 gcc_assert (frame_pointer_needed);
6086 pro_epilogue_adjust_stack (stack_pointer_rtx,
6087 hard_frame_pointer_rtx,
6088 GEN_INT (offset), style);
6089 }
6090 else if (frame.to_allocate)
6091 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6092 GEN_INT (frame.to_allocate), style);
6093
6094 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6095 if (ix86_save_reg (regno, false))
6096 {
6097 if (TARGET_64BIT)
6098 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6099 else
6100 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6101 }
6102 if (frame_pointer_needed)
6103 {
6104 /* Leave results in shorter dependency chains on CPUs that are
6105 able to grok it fast. */
6106 if (TARGET_USE_LEAVE)
6107 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6108 else if (TARGET_64BIT)
6109 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6110 else
6111 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6112 }
6113 }
6114
6115 if (cfun->machine->force_align_arg_pointer)
6116 {
6117 emit_insn (gen_addsi3 (stack_pointer_rtx,
6118 cfun->machine->force_align_arg_pointer,
6119 GEN_INT (-4)));
6120 }
6121
6122 /* Sibcall epilogues don't want a return instruction. */
6123 if (style == 0)
6124 return;
6125
6126 if (current_function_pops_args && current_function_args_size)
6127 {
6128 rtx popc = GEN_INT (current_function_pops_args);
6129
6130 /* i386 can only pop 64K bytes. If asked to pop more, pop
6131 return address, do explicit add, and jump indirectly to the
6132 caller. */
6133
6134 if (current_function_pops_args >= 65536)
6135 {
6136 rtx ecx = gen_rtx_REG (SImode, 2);
6137
6138 /* There is no "pascal" calling convention in 64bit ABI. */
6139 gcc_assert (!TARGET_64BIT);
6140
6141 emit_insn (gen_popsi1 (ecx));
6142 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6143 emit_jump_insn (gen_return_indirect_internal (ecx));
6144 }
6145 else
6146 emit_jump_insn (gen_return_pop_internal (popc));
6147 }
6148 else
6149 emit_jump_insn (gen_return_internal ());
6150 }
6151
6152 /* Reset from the function's potential modifications. */
6153
6154 static void
6155 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6156 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6157 {
6158 if (pic_offset_table_rtx)
6159 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6160 #if TARGET_MACHO
6161 /* Mach-O doesn't support labels at the end of objects, so if
6162 it looks like we might want one, insert a NOP. */
6163 {
6164 rtx insn = get_last_insn ();
6165 while (insn
6166 && NOTE_P (insn)
6167 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6168 insn = PREV_INSN (insn);
6169 if (insn
6170 && (LABEL_P (insn)
6171 || (NOTE_P (insn)
6172 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6173 fputs ("\tnop\n", file);
6174 }
6175 #endif
6176
6177 }
6178 \f
6179 /* Extract the parts of an RTL expression that is a valid memory address
6180 for an instruction. Return 0 if the structure of the address is
6181 grossly off. Return -1 if the address contains ASHIFT, so it is not
6182 strictly valid, but still used for computing length of lea instruction. */
6183
6184 int
6185 ix86_decompose_address (rtx addr, struct ix86_address *out)
6186 {
6187 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6188 rtx base_reg, index_reg;
6189 HOST_WIDE_INT scale = 1;
6190 rtx scale_rtx = NULL_RTX;
6191 int retval = 1;
6192 enum ix86_address_seg seg = SEG_DEFAULT;
6193
6194 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6195 base = addr;
6196 else if (GET_CODE (addr) == PLUS)
6197 {
6198 rtx addends[4], op;
6199 int n = 0, i;
6200
6201 op = addr;
6202 do
6203 {
6204 if (n >= 4)
6205 return 0;
6206 addends[n++] = XEXP (op, 1);
6207 op = XEXP (op, 0);
6208 }
6209 while (GET_CODE (op) == PLUS);
6210 if (n >= 4)
6211 return 0;
6212 addends[n] = op;
6213
6214 for (i = n; i >= 0; --i)
6215 {
6216 op = addends[i];
6217 switch (GET_CODE (op))
6218 {
6219 case MULT:
6220 if (index)
6221 return 0;
6222 index = XEXP (op, 0);
6223 scale_rtx = XEXP (op, 1);
6224 break;
6225
6226 case UNSPEC:
6227 if (XINT (op, 1) == UNSPEC_TP
6228 && TARGET_TLS_DIRECT_SEG_REFS
6229 && seg == SEG_DEFAULT)
6230 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6231 else
6232 return 0;
6233 break;
6234
6235 case REG:
6236 case SUBREG:
6237 if (!base)
6238 base = op;
6239 else if (!index)
6240 index = op;
6241 else
6242 return 0;
6243 break;
6244
6245 case CONST:
6246 case CONST_INT:
6247 case SYMBOL_REF:
6248 case LABEL_REF:
6249 if (disp)
6250 return 0;
6251 disp = op;
6252 break;
6253
6254 default:
6255 return 0;
6256 }
6257 }
6258 }
6259 else if (GET_CODE (addr) == MULT)
6260 {
6261 index = XEXP (addr, 0); /* index*scale */
6262 scale_rtx = XEXP (addr, 1);
6263 }
6264 else if (GET_CODE (addr) == ASHIFT)
6265 {
6266 rtx tmp;
6267
6268 /* We're called for lea too, which implements ashift on occasion. */
6269 index = XEXP (addr, 0);
6270 tmp = XEXP (addr, 1);
6271 if (!CONST_INT_P (tmp))
6272 return 0;
6273 scale = INTVAL (tmp);
6274 if ((unsigned HOST_WIDE_INT) scale > 3)
6275 return 0;
6276 scale = 1 << scale;
6277 retval = -1;
6278 }
6279 else
6280 disp = addr; /* displacement */
6281
6282 /* Extract the integral value of scale. */
6283 if (scale_rtx)
6284 {
6285 if (!CONST_INT_P (scale_rtx))
6286 return 0;
6287 scale = INTVAL (scale_rtx);
6288 }
6289
6290 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6291 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6292
6293 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6294 if (base_reg && index_reg && scale == 1
6295 && (index_reg == arg_pointer_rtx
6296 || index_reg == frame_pointer_rtx
6297 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6298 {
6299 rtx tmp;
6300 tmp = base, base = index, index = tmp;
6301 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6302 }
6303
6304 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6305 if ((base_reg == hard_frame_pointer_rtx
6306 || base_reg == frame_pointer_rtx
6307 || base_reg == arg_pointer_rtx) && !disp)
6308 disp = const0_rtx;
6309
6310 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6311 Avoid this by transforming to [%esi+0]. */
6312 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6313 && base_reg && !index_reg && !disp
6314 && REG_P (base_reg)
6315 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6316 disp = const0_rtx;
6317
6318 /* Special case: encode reg+reg instead of reg*2. */
6319 if (!base && index && scale && scale == 2)
6320 base = index, base_reg = index_reg, scale = 1;
6321
6322 /* Special case: scaling cannot be encoded without base or displacement. */
6323 if (!base && !disp && index && scale != 1)
6324 disp = const0_rtx;
6325
6326 out->base = base;
6327 out->index = index;
6328 out->disp = disp;
6329 out->scale = scale;
6330 out->seg = seg;
6331
6332 return retval;
6333 }
6334 \f
6335 /* Return cost of the memory address x.
6336 For i386, it is better to use a complex address than let gcc copy
6337 the address into a reg and make a new pseudo. But not if the address
6338 requires to two regs - that would mean more pseudos with longer
6339 lifetimes. */
6340 static int
6341 ix86_address_cost (rtx x)
6342 {
6343 struct ix86_address parts;
6344 int cost = 1;
6345 int ok = ix86_decompose_address (x, &parts);
6346
6347 gcc_assert (ok);
6348
6349 if (parts.base && GET_CODE (parts.base) == SUBREG)
6350 parts.base = SUBREG_REG (parts.base);
6351 if (parts.index && GET_CODE (parts.index) == SUBREG)
6352 parts.index = SUBREG_REG (parts.index);
6353
6354 /* More complex memory references are better. */
6355 if (parts.disp && parts.disp != const0_rtx)
6356 cost--;
6357 if (parts.seg != SEG_DEFAULT)
6358 cost--;
6359
6360 /* Attempt to minimize number of registers in the address. */
6361 if ((parts.base
6362 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6363 || (parts.index
6364 && (!REG_P (parts.index)
6365 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6366 cost++;
6367
6368 if (parts.base
6369 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6370 && parts.index
6371 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6372 && parts.base != parts.index)
6373 cost++;
6374
6375 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6376 since it's predecode logic can't detect the length of instructions
6377 and it degenerates to vector decoded. Increase cost of such
6378 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6379 to split such addresses or even refuse such addresses at all.
6380
6381 Following addressing modes are affected:
6382 [base+scale*index]
6383 [scale*index+disp]
6384 [base+index]
6385
6386 The first and last case may be avoidable by explicitly coding the zero in
6387 memory address, but I don't have AMD-K6 machine handy to check this
6388 theory. */
6389
6390 if (TARGET_K6
6391 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6392 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6393 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6394 cost += 10;
6395
6396 return cost;
6397 }
6398 \f
6399 /* If X is a machine specific address (i.e. a symbol or label being
6400 referenced as a displacement from the GOT implemented using an
6401 UNSPEC), then return the base term. Otherwise return X. */
6402
6403 rtx
6404 ix86_find_base_term (rtx x)
6405 {
6406 rtx term;
6407
6408 if (TARGET_64BIT)
6409 {
6410 if (GET_CODE (x) != CONST)
6411 return x;
6412 term = XEXP (x, 0);
6413 if (GET_CODE (term) == PLUS
6414 && (CONST_INT_P (XEXP (term, 1))
6415 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6416 term = XEXP (term, 0);
6417 if (GET_CODE (term) != UNSPEC
6418 || XINT (term, 1) != UNSPEC_GOTPCREL)
6419 return x;
6420
6421 term = XVECEXP (term, 0, 0);
6422
6423 if (GET_CODE (term) != SYMBOL_REF
6424 && GET_CODE (term) != LABEL_REF)
6425 return x;
6426
6427 return term;
6428 }
6429
6430 term = ix86_delegitimize_address (x);
6431
6432 if (GET_CODE (term) != SYMBOL_REF
6433 && GET_CODE (term) != LABEL_REF)
6434 return x;
6435
6436 return term;
6437 }
6438
6439 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6440 this is used for to form addresses to local data when -fPIC is in
6441 use. */
6442
6443 static bool
6444 darwin_local_data_pic (rtx disp)
6445 {
6446 if (GET_CODE (disp) == MINUS)
6447 {
6448 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6449 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6450 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6451 {
6452 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6453 if (! strcmp (sym_name, "<pic base>"))
6454 return true;
6455 }
6456 }
6457
6458 return false;
6459 }
6460 \f
6461 /* Determine if a given RTX is a valid constant. We already know this
6462 satisfies CONSTANT_P. */
6463
6464 bool
6465 legitimate_constant_p (rtx x)
6466 {
6467 switch (GET_CODE (x))
6468 {
6469 case CONST:
6470 x = XEXP (x, 0);
6471
6472 if (GET_CODE (x) == PLUS)
6473 {
6474 if (!CONST_INT_P (XEXP (x, 1)))
6475 return false;
6476 x = XEXP (x, 0);
6477 }
6478
6479 if (TARGET_MACHO && darwin_local_data_pic (x))
6480 return true;
6481
6482 /* Only some unspecs are valid as "constants". */
6483 if (GET_CODE (x) == UNSPEC)
6484 switch (XINT (x, 1))
6485 {
6486 case UNSPEC_GOTOFF:
6487 return TARGET_64BIT;
6488 case UNSPEC_TPOFF:
6489 case UNSPEC_NTPOFF:
6490 x = XVECEXP (x, 0, 0);
6491 return (GET_CODE (x) == SYMBOL_REF
6492 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6493 case UNSPEC_DTPOFF:
6494 x = XVECEXP (x, 0, 0);
6495 return (GET_CODE (x) == SYMBOL_REF
6496 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6497 default:
6498 return false;
6499 }
6500
6501 /* We must have drilled down to a symbol. */
6502 if (GET_CODE (x) == LABEL_REF)
6503 return true;
6504 if (GET_CODE (x) != SYMBOL_REF)
6505 return false;
6506 /* FALLTHRU */
6507
6508 case SYMBOL_REF:
6509 /* TLS symbols are never valid. */
6510 if (SYMBOL_REF_TLS_MODEL (x))
6511 return false;
6512 break;
6513
6514 case CONST_DOUBLE:
6515 if (GET_MODE (x) == TImode
6516 && x != CONST0_RTX (TImode)
6517 && !TARGET_64BIT)
6518 return false;
6519 break;
6520
6521 case CONST_VECTOR:
6522 if (x == CONST0_RTX (GET_MODE (x)))
6523 return true;
6524 return false;
6525
6526 default:
6527 break;
6528 }
6529
6530 /* Otherwise we handle everything else in the move patterns. */
6531 return true;
6532 }
6533
6534 /* Determine if it's legal to put X into the constant pool. This
6535 is not possible for the address of thread-local symbols, which
6536 is checked above. */
6537
6538 static bool
6539 ix86_cannot_force_const_mem (rtx x)
6540 {
6541 /* We can always put integral constants and vectors in memory. */
6542 switch (GET_CODE (x))
6543 {
6544 case CONST_INT:
6545 case CONST_DOUBLE:
6546 case CONST_VECTOR:
6547 return false;
6548
6549 default:
6550 break;
6551 }
6552 return !legitimate_constant_p (x);
6553 }
6554
6555 /* Determine if a given RTX is a valid constant address. */
6556
6557 bool
6558 constant_address_p (rtx x)
6559 {
6560 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6561 }
6562
6563 /* Nonzero if the constant value X is a legitimate general operand
6564 when generating PIC code. It is given that flag_pic is on and
6565 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6566
6567 bool
6568 legitimate_pic_operand_p (rtx x)
6569 {
6570 rtx inner;
6571
6572 switch (GET_CODE (x))
6573 {
6574 case CONST:
6575 inner = XEXP (x, 0);
6576 if (GET_CODE (inner) == PLUS
6577 && CONST_INT_P (XEXP (inner, 1)))
6578 inner = XEXP (inner, 0);
6579
6580 /* Only some unspecs are valid as "constants". */
6581 if (GET_CODE (inner) == UNSPEC)
6582 switch (XINT (inner, 1))
6583 {
6584 case UNSPEC_GOTOFF:
6585 return TARGET_64BIT;
6586 case UNSPEC_TPOFF:
6587 x = XVECEXP (inner, 0, 0);
6588 return (GET_CODE (x) == SYMBOL_REF
6589 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6590 default:
6591 return false;
6592 }
6593 /* FALLTHRU */
6594
6595 case SYMBOL_REF:
6596 case LABEL_REF:
6597 return legitimate_pic_address_disp_p (x);
6598
6599 default:
6600 return true;
6601 }
6602 }
6603
6604 /* Determine if a given CONST RTX is a valid memory displacement
6605 in PIC mode. */
6606
6607 int
6608 legitimate_pic_address_disp_p (rtx disp)
6609 {
6610 bool saw_plus;
6611
6612 /* In 64bit mode we can allow direct addresses of symbols and labels
6613 when they are not dynamic symbols. */
6614 if (TARGET_64BIT)
6615 {
6616 rtx op0 = disp, op1;
6617
6618 switch (GET_CODE (disp))
6619 {
6620 case LABEL_REF:
6621 return true;
6622
6623 case CONST:
6624 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6625 break;
6626 op0 = XEXP (XEXP (disp, 0), 0);
6627 op1 = XEXP (XEXP (disp, 0), 1);
6628 if (!CONST_INT_P (op1)
6629 || INTVAL (op1) >= 16*1024*1024
6630 || INTVAL (op1) < -16*1024*1024)
6631 break;
6632 if (GET_CODE (op0) == LABEL_REF)
6633 return true;
6634 if (GET_CODE (op0) != SYMBOL_REF)
6635 break;
6636 /* FALLTHRU */
6637
6638 case SYMBOL_REF:
6639 /* TLS references should always be enclosed in UNSPEC. */
6640 if (SYMBOL_REF_TLS_MODEL (op0))
6641 return false;
6642 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0))
6643 return true;
6644 break;
6645
6646 default:
6647 break;
6648 }
6649 }
6650 if (GET_CODE (disp) != CONST)
6651 return 0;
6652 disp = XEXP (disp, 0);
6653
6654 if (TARGET_64BIT)
6655 {
6656 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6657 of GOT tables. We should not need these anyway. */
6658 if (GET_CODE (disp) != UNSPEC
6659 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6660 && XINT (disp, 1) != UNSPEC_GOTOFF))
6661 return 0;
6662
6663 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6664 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6665 return 0;
6666 return 1;
6667 }
6668
6669 saw_plus = false;
6670 if (GET_CODE (disp) == PLUS)
6671 {
6672 if (!CONST_INT_P (XEXP (disp, 1)))
6673 return 0;
6674 disp = XEXP (disp, 0);
6675 saw_plus = true;
6676 }
6677
6678 if (TARGET_MACHO && darwin_local_data_pic (disp))
6679 return 1;
6680
6681 if (GET_CODE (disp) != UNSPEC)
6682 return 0;
6683
6684 switch (XINT (disp, 1))
6685 {
6686 case UNSPEC_GOT:
6687 if (saw_plus)
6688 return false;
6689 return GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF;
6690 case UNSPEC_GOTOFF:
6691 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6692 While ABI specify also 32bit relocation but we don't produce it in
6693 small PIC model at all. */
6694 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6695 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6696 && !TARGET_64BIT)
6697 return local_symbolic_operand (XVECEXP (disp, 0, 0), Pmode);
6698 return false;
6699 case UNSPEC_GOTTPOFF:
6700 case UNSPEC_GOTNTPOFF:
6701 case UNSPEC_INDNTPOFF:
6702 if (saw_plus)
6703 return false;
6704 disp = XVECEXP (disp, 0, 0);
6705 return (GET_CODE (disp) == SYMBOL_REF
6706 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6707 case UNSPEC_NTPOFF:
6708 disp = XVECEXP (disp, 0, 0);
6709 return (GET_CODE (disp) == SYMBOL_REF
6710 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6711 case UNSPEC_DTPOFF:
6712 disp = XVECEXP (disp, 0, 0);
6713 return (GET_CODE (disp) == SYMBOL_REF
6714 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6715 }
6716
6717 return 0;
6718 }
6719
6720 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6721 memory address for an instruction. The MODE argument is the machine mode
6722 for the MEM expression that wants to use this address.
6723
6724 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6725 convert common non-canonical forms to canonical form so that they will
6726 be recognized. */
6727
6728 int
6729 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6730 {
6731 struct ix86_address parts;
6732 rtx base, index, disp;
6733 HOST_WIDE_INT scale;
6734 const char *reason = NULL;
6735 rtx reason_rtx = NULL_RTX;
6736
6737 if (TARGET_DEBUG_ADDR)
6738 {
6739 fprintf (stderr,
6740 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6741 GET_MODE_NAME (mode), strict);
6742 debug_rtx (addr);
6743 }
6744
6745 if (ix86_decompose_address (addr, &parts) <= 0)
6746 {
6747 reason = "decomposition failed";
6748 goto report_error;
6749 }
6750
6751 base = parts.base;
6752 index = parts.index;
6753 disp = parts.disp;
6754 scale = parts.scale;
6755
6756 /* Validate base register.
6757
6758 Don't allow SUBREG's that span more than a word here. It can lead to spill
6759 failures when the base is one word out of a two word structure, which is
6760 represented internally as a DImode int. */
6761
6762 if (base)
6763 {
6764 rtx reg;
6765 reason_rtx = base;
6766
6767 if (REG_P (base))
6768 reg = base;
6769 else if (GET_CODE (base) == SUBREG
6770 && REG_P (SUBREG_REG (base))
6771 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6772 <= UNITS_PER_WORD)
6773 reg = SUBREG_REG (base);
6774 else
6775 {
6776 reason = "base is not a register";
6777 goto report_error;
6778 }
6779
6780 if (GET_MODE (base) != Pmode)
6781 {
6782 reason = "base is not in Pmode";
6783 goto report_error;
6784 }
6785
6786 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6787 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6788 {
6789 reason = "base is not valid";
6790 goto report_error;
6791 }
6792 }
6793
6794 /* Validate index register.
6795
6796 Don't allow SUBREG's that span more than a word here -- same as above. */
6797
6798 if (index)
6799 {
6800 rtx reg;
6801 reason_rtx = index;
6802
6803 if (REG_P (index))
6804 reg = index;
6805 else if (GET_CODE (index) == SUBREG
6806 && REG_P (SUBREG_REG (index))
6807 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6808 <= UNITS_PER_WORD)
6809 reg = SUBREG_REG (index);
6810 else
6811 {
6812 reason = "index is not a register";
6813 goto report_error;
6814 }
6815
6816 if (GET_MODE (index) != Pmode)
6817 {
6818 reason = "index is not in Pmode";
6819 goto report_error;
6820 }
6821
6822 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6823 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6824 {
6825 reason = "index is not valid";
6826 goto report_error;
6827 }
6828 }
6829
6830 /* Validate scale factor. */
6831 if (scale != 1)
6832 {
6833 reason_rtx = GEN_INT (scale);
6834 if (!index)
6835 {
6836 reason = "scale without index";
6837 goto report_error;
6838 }
6839
6840 if (scale != 2 && scale != 4 && scale != 8)
6841 {
6842 reason = "scale is not a valid multiplier";
6843 goto report_error;
6844 }
6845 }
6846
6847 /* Validate displacement. */
6848 if (disp)
6849 {
6850 reason_rtx = disp;
6851
6852 if (GET_CODE (disp) == CONST
6853 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6854 switch (XINT (XEXP (disp, 0), 1))
6855 {
6856 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6857 used. While ABI specify also 32bit relocations, we don't produce
6858 them at all and use IP relative instead. */
6859 case UNSPEC_GOT:
6860 case UNSPEC_GOTOFF:
6861 gcc_assert (flag_pic);
6862 if (!TARGET_64BIT)
6863 goto is_legitimate_pic;
6864 reason = "64bit address unspec";
6865 goto report_error;
6866
6867 case UNSPEC_GOTPCREL:
6868 gcc_assert (flag_pic);
6869 goto is_legitimate_pic;
6870
6871 case UNSPEC_GOTTPOFF:
6872 case UNSPEC_GOTNTPOFF:
6873 case UNSPEC_INDNTPOFF:
6874 case UNSPEC_NTPOFF:
6875 case UNSPEC_DTPOFF:
6876 break;
6877
6878 default:
6879 reason = "invalid address unspec";
6880 goto report_error;
6881 }
6882
6883 else if (SYMBOLIC_CONST (disp)
6884 && (flag_pic
6885 || (TARGET_MACHO
6886 #if TARGET_MACHO
6887 && MACHOPIC_INDIRECT
6888 && !machopic_operand_p (disp)
6889 #endif
6890 )))
6891 {
6892
6893 is_legitimate_pic:
6894 if (TARGET_64BIT && (index || base))
6895 {
6896 /* foo@dtpoff(%rX) is ok. */
6897 if (GET_CODE (disp) != CONST
6898 || GET_CODE (XEXP (disp, 0)) != PLUS
6899 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6900 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6901 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6902 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6903 {
6904 reason = "non-constant pic memory reference";
6905 goto report_error;
6906 }
6907 }
6908 else if (! legitimate_pic_address_disp_p (disp))
6909 {
6910 reason = "displacement is an invalid pic construct";
6911 goto report_error;
6912 }
6913
6914 /* This code used to verify that a symbolic pic displacement
6915 includes the pic_offset_table_rtx register.
6916
6917 While this is good idea, unfortunately these constructs may
6918 be created by "adds using lea" optimization for incorrect
6919 code like:
6920
6921 int a;
6922 int foo(int i)
6923 {
6924 return *(&a+i);
6925 }
6926
6927 This code is nonsensical, but results in addressing
6928 GOT table with pic_offset_table_rtx base. We can't
6929 just refuse it easily, since it gets matched by
6930 "addsi3" pattern, that later gets split to lea in the
6931 case output register differs from input. While this
6932 can be handled by separate addsi pattern for this case
6933 that never results in lea, this seems to be easier and
6934 correct fix for crash to disable this test. */
6935 }
6936 else if (GET_CODE (disp) != LABEL_REF
6937 && !CONST_INT_P (disp)
6938 && (GET_CODE (disp) != CONST
6939 || !legitimate_constant_p (disp))
6940 && (GET_CODE (disp) != SYMBOL_REF
6941 || !legitimate_constant_p (disp)))
6942 {
6943 reason = "displacement is not constant";
6944 goto report_error;
6945 }
6946 else if (TARGET_64BIT
6947 && !x86_64_immediate_operand (disp, VOIDmode))
6948 {
6949 reason = "displacement is out of range";
6950 goto report_error;
6951 }
6952 }
6953
6954 /* Everything looks valid. */
6955 if (TARGET_DEBUG_ADDR)
6956 fprintf (stderr, "Success.\n");
6957 return TRUE;
6958
6959 report_error:
6960 if (TARGET_DEBUG_ADDR)
6961 {
6962 fprintf (stderr, "Error: %s\n", reason);
6963 debug_rtx (reason_rtx);
6964 }
6965 return FALSE;
6966 }
6967 \f
6968 /* Return a unique alias set for the GOT. */
6969
6970 static HOST_WIDE_INT
6971 ix86_GOT_alias_set (void)
6972 {
6973 static HOST_WIDE_INT set = -1;
6974 if (set == -1)
6975 set = new_alias_set ();
6976 return set;
6977 }
6978
6979 /* Return a legitimate reference for ORIG (an address) using the
6980 register REG. If REG is 0, a new pseudo is generated.
6981
6982 There are two types of references that must be handled:
6983
6984 1. Global data references must load the address from the GOT, via
6985 the PIC reg. An insn is emitted to do this load, and the reg is
6986 returned.
6987
6988 2. Static data references, constant pool addresses, and code labels
6989 compute the address as an offset from the GOT, whose base is in
6990 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6991 differentiate them from global data objects. The returned
6992 address is the PIC reg + an unspec constant.
6993
6994 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6995 reg also appears in the address. */
6996
6997 static rtx
6998 legitimize_pic_address (rtx orig, rtx reg)
6999 {
7000 rtx addr = orig;
7001 rtx new = orig;
7002 rtx base;
7003
7004 #if TARGET_MACHO
7005 if (TARGET_MACHO && !TARGET_64BIT)
7006 {
7007 if (reg == 0)
7008 reg = gen_reg_rtx (Pmode);
7009 /* Use the generic Mach-O PIC machinery. */
7010 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7011 }
7012 #endif
7013
7014 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7015 new = addr;
7016 else if (TARGET_64BIT
7017 && ix86_cmodel != CM_SMALL_PIC
7018 && local_symbolic_operand (addr, Pmode))
7019 {
7020 rtx tmpreg;
7021 /* This symbol may be referenced via a displacement from the PIC
7022 base address (@GOTOFF). */
7023
7024 if (reload_in_progress)
7025 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7026 if (GET_CODE (addr) == CONST)
7027 addr = XEXP (addr, 0);
7028 if (GET_CODE (addr) == PLUS)
7029 {
7030 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7031 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7032 }
7033 else
7034 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7035 new = gen_rtx_CONST (Pmode, new);
7036 if (!reg)
7037 tmpreg = gen_reg_rtx (Pmode);
7038 else
7039 tmpreg = reg;
7040 emit_move_insn (tmpreg, new);
7041
7042 if (reg != 0)
7043 {
7044 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7045 tmpreg, 1, OPTAB_DIRECT);
7046 new = reg;
7047 }
7048 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7049 }
7050 else if (!TARGET_64BIT && local_symbolic_operand (addr, Pmode))
7051 {
7052 /* This symbol may be referenced via a displacement from the PIC
7053 base address (@GOTOFF). */
7054
7055 if (reload_in_progress)
7056 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7057 if (GET_CODE (addr) == CONST)
7058 addr = XEXP (addr, 0);
7059 if (GET_CODE (addr) == PLUS)
7060 {
7061 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7062 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7063 }
7064 else
7065 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7066 new = gen_rtx_CONST (Pmode, new);
7067 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7068
7069 if (reg != 0)
7070 {
7071 emit_move_insn (reg, new);
7072 new = reg;
7073 }
7074 }
7075 else if (GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7076 {
7077 if (TARGET_64BIT)
7078 {
7079 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7080 new = gen_rtx_CONST (Pmode, new);
7081 new = gen_const_mem (Pmode, new);
7082 set_mem_alias_set (new, ix86_GOT_alias_set ());
7083
7084 if (reg == 0)
7085 reg = gen_reg_rtx (Pmode);
7086 /* Use directly gen_movsi, otherwise the address is loaded
7087 into register for CSE. We don't want to CSE this addresses,
7088 instead we CSE addresses from the GOT table, so skip this. */
7089 emit_insn (gen_movsi (reg, new));
7090 new = reg;
7091 }
7092 else
7093 {
7094 /* This symbol must be referenced via a load from the
7095 Global Offset Table (@GOT). */
7096
7097 if (reload_in_progress)
7098 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7099 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7100 new = gen_rtx_CONST (Pmode, new);
7101 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7102 new = gen_const_mem (Pmode, new);
7103 set_mem_alias_set (new, ix86_GOT_alias_set ());
7104
7105 if (reg == 0)
7106 reg = gen_reg_rtx (Pmode);
7107 emit_move_insn (reg, new);
7108 new = reg;
7109 }
7110 }
7111 else
7112 {
7113 if (CONST_INT_P (addr)
7114 && !x86_64_immediate_operand (addr, VOIDmode))
7115 {
7116 if (reg)
7117 {
7118 emit_move_insn (reg, addr);
7119 new = reg;
7120 }
7121 else
7122 new = force_reg (Pmode, addr);
7123 }
7124 else if (GET_CODE (addr) == CONST)
7125 {
7126 addr = XEXP (addr, 0);
7127
7128 /* We must match stuff we generate before. Assume the only
7129 unspecs that can get here are ours. Not that we could do
7130 anything with them anyway.... */
7131 if (GET_CODE (addr) == UNSPEC
7132 || (GET_CODE (addr) == PLUS
7133 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7134 return orig;
7135 gcc_assert (GET_CODE (addr) == PLUS);
7136 }
7137 if (GET_CODE (addr) == PLUS)
7138 {
7139 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7140
7141 /* Check first to see if this is a constant offset from a @GOTOFF
7142 symbol reference. */
7143 if (local_symbolic_operand (op0, Pmode)
7144 && CONST_INT_P (op1))
7145 {
7146 if (!TARGET_64BIT)
7147 {
7148 if (reload_in_progress)
7149 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7150 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7151 UNSPEC_GOTOFF);
7152 new = gen_rtx_PLUS (Pmode, new, op1);
7153 new = gen_rtx_CONST (Pmode, new);
7154 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7155
7156 if (reg != 0)
7157 {
7158 emit_move_insn (reg, new);
7159 new = reg;
7160 }
7161 }
7162 else
7163 {
7164 if (INTVAL (op1) < -16*1024*1024
7165 || INTVAL (op1) >= 16*1024*1024)
7166 {
7167 if (!x86_64_immediate_operand (op1, Pmode))
7168 op1 = force_reg (Pmode, op1);
7169 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7170 }
7171 }
7172 }
7173 else
7174 {
7175 base = legitimize_pic_address (XEXP (addr, 0), reg);
7176 new = legitimize_pic_address (XEXP (addr, 1),
7177 base == reg ? NULL_RTX : reg);
7178
7179 if (CONST_INT_P (new))
7180 new = plus_constant (base, INTVAL (new));
7181 else
7182 {
7183 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7184 {
7185 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7186 new = XEXP (new, 1);
7187 }
7188 new = gen_rtx_PLUS (Pmode, base, new);
7189 }
7190 }
7191 }
7192 }
7193 return new;
7194 }
7195 \f
7196 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7197
7198 static rtx
7199 get_thread_pointer (int to_reg)
7200 {
7201 rtx tp, reg, insn;
7202
7203 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7204 if (!to_reg)
7205 return tp;
7206
7207 reg = gen_reg_rtx (Pmode);
7208 insn = gen_rtx_SET (VOIDmode, reg, tp);
7209 insn = emit_insn (insn);
7210
7211 return reg;
7212 }
7213
7214 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7215 false if we expect this to be used for a memory address and true if
7216 we expect to load the address into a register. */
7217
7218 static rtx
7219 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7220 {
7221 rtx dest, base, off, pic, tp;
7222 int type;
7223
7224 switch (model)
7225 {
7226 case TLS_MODEL_GLOBAL_DYNAMIC:
7227 dest = gen_reg_rtx (Pmode);
7228 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7229
7230 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7231 {
7232 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7233
7234 start_sequence ();
7235 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7236 insns = get_insns ();
7237 end_sequence ();
7238
7239 emit_libcall_block (insns, dest, rax, x);
7240 }
7241 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7242 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7243 else
7244 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7245
7246 if (TARGET_GNU2_TLS)
7247 {
7248 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7249
7250 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7251 }
7252 break;
7253
7254 case TLS_MODEL_LOCAL_DYNAMIC:
7255 base = gen_reg_rtx (Pmode);
7256 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7257
7258 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7259 {
7260 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7261
7262 start_sequence ();
7263 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7264 insns = get_insns ();
7265 end_sequence ();
7266
7267 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7268 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7269 emit_libcall_block (insns, base, rax, note);
7270 }
7271 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7272 emit_insn (gen_tls_local_dynamic_base_64 (base));
7273 else
7274 emit_insn (gen_tls_local_dynamic_base_32 (base));
7275
7276 if (TARGET_GNU2_TLS)
7277 {
7278 rtx x = ix86_tls_module_base ();
7279
7280 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7281 gen_rtx_MINUS (Pmode, x, tp));
7282 }
7283
7284 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7285 off = gen_rtx_CONST (Pmode, off);
7286
7287 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7288
7289 if (TARGET_GNU2_TLS)
7290 {
7291 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7292
7293 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7294 }
7295
7296 break;
7297
7298 case TLS_MODEL_INITIAL_EXEC:
7299 if (TARGET_64BIT)
7300 {
7301 pic = NULL;
7302 type = UNSPEC_GOTNTPOFF;
7303 }
7304 else if (flag_pic)
7305 {
7306 if (reload_in_progress)
7307 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7308 pic = pic_offset_table_rtx;
7309 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7310 }
7311 else if (!TARGET_ANY_GNU_TLS)
7312 {
7313 pic = gen_reg_rtx (Pmode);
7314 emit_insn (gen_set_got (pic));
7315 type = UNSPEC_GOTTPOFF;
7316 }
7317 else
7318 {
7319 pic = NULL;
7320 type = UNSPEC_INDNTPOFF;
7321 }
7322
7323 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7324 off = gen_rtx_CONST (Pmode, off);
7325 if (pic)
7326 off = gen_rtx_PLUS (Pmode, pic, off);
7327 off = gen_const_mem (Pmode, off);
7328 set_mem_alias_set (off, ix86_GOT_alias_set ());
7329
7330 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7331 {
7332 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7333 off = force_reg (Pmode, off);
7334 return gen_rtx_PLUS (Pmode, base, off);
7335 }
7336 else
7337 {
7338 base = get_thread_pointer (true);
7339 dest = gen_reg_rtx (Pmode);
7340 emit_insn (gen_subsi3 (dest, base, off));
7341 }
7342 break;
7343
7344 case TLS_MODEL_LOCAL_EXEC:
7345 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7346 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7347 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7348 off = gen_rtx_CONST (Pmode, off);
7349
7350 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7351 {
7352 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7353 return gen_rtx_PLUS (Pmode, base, off);
7354 }
7355 else
7356 {
7357 base = get_thread_pointer (true);
7358 dest = gen_reg_rtx (Pmode);
7359 emit_insn (gen_subsi3 (dest, base, off));
7360 }
7361 break;
7362
7363 default:
7364 gcc_unreachable ();
7365 }
7366
7367 return dest;
7368 }
7369
7370 /* Try machine-dependent ways of modifying an illegitimate address
7371 to be legitimate. If we find one, return the new, valid address.
7372 This macro is used in only one place: `memory_address' in explow.c.
7373
7374 OLDX is the address as it was before break_out_memory_refs was called.
7375 In some cases it is useful to look at this to decide what needs to be done.
7376
7377 MODE and WIN are passed so that this macro can use
7378 GO_IF_LEGITIMATE_ADDRESS.
7379
7380 It is always safe for this macro to do nothing. It exists to recognize
7381 opportunities to optimize the output.
7382
7383 For the 80386, we handle X+REG by loading X into a register R and
7384 using R+REG. R will go in a general reg and indexing will be used.
7385 However, if REG is a broken-out memory address or multiplication,
7386 nothing needs to be done because REG can certainly go in a general reg.
7387
7388 When -fpic is used, special handling is needed for symbolic references.
7389 See comments by legitimize_pic_address in i386.c for details. */
7390
7391 rtx
7392 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7393 {
7394 int changed = 0;
7395 unsigned log;
7396
7397 if (TARGET_DEBUG_ADDR)
7398 {
7399 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7400 GET_MODE_NAME (mode));
7401 debug_rtx (x);
7402 }
7403
7404 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7405 if (log)
7406 return legitimize_tls_address (x, log, false);
7407 if (GET_CODE (x) == CONST
7408 && GET_CODE (XEXP (x, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7410 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7411 {
7412 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7413 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7414 }
7415
7416 if (flag_pic && SYMBOLIC_CONST (x))
7417 return legitimize_pic_address (x, 0);
7418
7419 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7420 if (GET_CODE (x) == ASHIFT
7421 && CONST_INT_P (XEXP (x, 1))
7422 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7423 {
7424 changed = 1;
7425 log = INTVAL (XEXP (x, 1));
7426 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7427 GEN_INT (1 << log));
7428 }
7429
7430 if (GET_CODE (x) == PLUS)
7431 {
7432 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7433
7434 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7435 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7436 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7437 {
7438 changed = 1;
7439 log = INTVAL (XEXP (XEXP (x, 0), 1));
7440 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7441 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7442 GEN_INT (1 << log));
7443 }
7444
7445 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7446 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7447 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7448 {
7449 changed = 1;
7450 log = INTVAL (XEXP (XEXP (x, 1), 1));
7451 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7452 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7453 GEN_INT (1 << log));
7454 }
7455
7456 /* Put multiply first if it isn't already. */
7457 if (GET_CODE (XEXP (x, 1)) == MULT)
7458 {
7459 rtx tmp = XEXP (x, 0);
7460 XEXP (x, 0) = XEXP (x, 1);
7461 XEXP (x, 1) = tmp;
7462 changed = 1;
7463 }
7464
7465 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7466 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7467 created by virtual register instantiation, register elimination, and
7468 similar optimizations. */
7469 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7470 {
7471 changed = 1;
7472 x = gen_rtx_PLUS (Pmode,
7473 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7474 XEXP (XEXP (x, 1), 0)),
7475 XEXP (XEXP (x, 1), 1));
7476 }
7477
7478 /* Canonicalize
7479 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7480 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7481 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7482 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7483 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7484 && CONSTANT_P (XEXP (x, 1)))
7485 {
7486 rtx constant;
7487 rtx other = NULL_RTX;
7488
7489 if (CONST_INT_P (XEXP (x, 1)))
7490 {
7491 constant = XEXP (x, 1);
7492 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7493 }
7494 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7495 {
7496 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7497 other = XEXP (x, 1);
7498 }
7499 else
7500 constant = 0;
7501
7502 if (constant)
7503 {
7504 changed = 1;
7505 x = gen_rtx_PLUS (Pmode,
7506 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7507 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7508 plus_constant (other, INTVAL (constant)));
7509 }
7510 }
7511
7512 if (changed && legitimate_address_p (mode, x, FALSE))
7513 return x;
7514
7515 if (GET_CODE (XEXP (x, 0)) == MULT)
7516 {
7517 changed = 1;
7518 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7519 }
7520
7521 if (GET_CODE (XEXP (x, 1)) == MULT)
7522 {
7523 changed = 1;
7524 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7525 }
7526
7527 if (changed
7528 && REG_P (XEXP (x, 1))
7529 && REG_P (XEXP (x, 0)))
7530 return x;
7531
7532 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7533 {
7534 changed = 1;
7535 x = legitimize_pic_address (x, 0);
7536 }
7537
7538 if (changed && legitimate_address_p (mode, x, FALSE))
7539 return x;
7540
7541 if (REG_P (XEXP (x, 0)))
7542 {
7543 rtx temp = gen_reg_rtx (Pmode);
7544 rtx val = force_operand (XEXP (x, 1), temp);
7545 if (val != temp)
7546 emit_move_insn (temp, val);
7547
7548 XEXP (x, 1) = temp;
7549 return x;
7550 }
7551
7552 else if (REG_P (XEXP (x, 1)))
7553 {
7554 rtx temp = gen_reg_rtx (Pmode);
7555 rtx val = force_operand (XEXP (x, 0), temp);
7556 if (val != temp)
7557 emit_move_insn (temp, val);
7558
7559 XEXP (x, 0) = temp;
7560 return x;
7561 }
7562 }
7563
7564 return x;
7565 }
7566 \f
7567 /* Print an integer constant expression in assembler syntax. Addition
7568 and subtraction are the only arithmetic that may appear in these
7569 expressions. FILE is the stdio stream to write to, X is the rtx, and
7570 CODE is the operand print code from the output string. */
7571
7572 static void
7573 output_pic_addr_const (FILE *file, rtx x, int code)
7574 {
7575 char buf[256];
7576
7577 switch (GET_CODE (x))
7578 {
7579 case PC:
7580 gcc_assert (flag_pic);
7581 putc ('.', file);
7582 break;
7583
7584 case SYMBOL_REF:
7585 if (! TARGET_MACHO || TARGET_64BIT)
7586 output_addr_const (file, x);
7587 else
7588 {
7589 const char *name = XSTR (x, 0);
7590
7591 /* Mark the decl as referenced so that cgraph will output the function. */
7592 if (SYMBOL_REF_DECL (x))
7593 mark_decl_referenced (SYMBOL_REF_DECL (x));
7594
7595 #if TARGET_MACHO
7596 if (MACHOPIC_INDIRECT
7597 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7598 name = machopic_indirection_name (x, /*stub_p=*/true);
7599 #endif
7600 assemble_name (file, name);
7601 }
7602 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7603 fputs ("@PLT", file);
7604 break;
7605
7606 case LABEL_REF:
7607 x = XEXP (x, 0);
7608 /* FALLTHRU */
7609 case CODE_LABEL:
7610 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7611 assemble_name (asm_out_file, buf);
7612 break;
7613
7614 case CONST_INT:
7615 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7616 break;
7617
7618 case CONST:
7619 /* This used to output parentheses around the expression,
7620 but that does not work on the 386 (either ATT or BSD assembler). */
7621 output_pic_addr_const (file, XEXP (x, 0), code);
7622 break;
7623
7624 case CONST_DOUBLE:
7625 if (GET_MODE (x) == VOIDmode)
7626 {
7627 /* We can use %d if the number is <32 bits and positive. */
7628 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7629 fprintf (file, "0x%lx%08lx",
7630 (unsigned long) CONST_DOUBLE_HIGH (x),
7631 (unsigned long) CONST_DOUBLE_LOW (x));
7632 else
7633 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7634 }
7635 else
7636 /* We can't handle floating point constants;
7637 PRINT_OPERAND must handle them. */
7638 output_operand_lossage ("floating constant misused");
7639 break;
7640
7641 case PLUS:
7642 /* Some assemblers need integer constants to appear first. */
7643 if (CONST_INT_P (XEXP (x, 0)))
7644 {
7645 output_pic_addr_const (file, XEXP (x, 0), code);
7646 putc ('+', file);
7647 output_pic_addr_const (file, XEXP (x, 1), code);
7648 }
7649 else
7650 {
7651 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7652 output_pic_addr_const (file, XEXP (x, 1), code);
7653 putc ('+', file);
7654 output_pic_addr_const (file, XEXP (x, 0), code);
7655 }
7656 break;
7657
7658 case MINUS:
7659 if (!TARGET_MACHO)
7660 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7661 output_pic_addr_const (file, XEXP (x, 0), code);
7662 putc ('-', file);
7663 output_pic_addr_const (file, XEXP (x, 1), code);
7664 if (!TARGET_MACHO)
7665 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7666 break;
7667
7668 case UNSPEC:
7669 gcc_assert (XVECLEN (x, 0) == 1);
7670 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7671 switch (XINT (x, 1))
7672 {
7673 case UNSPEC_GOT:
7674 fputs ("@GOT", file);
7675 break;
7676 case UNSPEC_GOTOFF:
7677 fputs ("@GOTOFF", file);
7678 break;
7679 case UNSPEC_GOTPCREL:
7680 fputs ("@GOTPCREL(%rip)", file);
7681 break;
7682 case UNSPEC_GOTTPOFF:
7683 /* FIXME: This might be @TPOFF in Sun ld too. */
7684 fputs ("@GOTTPOFF", file);
7685 break;
7686 case UNSPEC_TPOFF:
7687 fputs ("@TPOFF", file);
7688 break;
7689 case UNSPEC_NTPOFF:
7690 if (TARGET_64BIT)
7691 fputs ("@TPOFF", file);
7692 else
7693 fputs ("@NTPOFF", file);
7694 break;
7695 case UNSPEC_DTPOFF:
7696 fputs ("@DTPOFF", file);
7697 break;
7698 case UNSPEC_GOTNTPOFF:
7699 if (TARGET_64BIT)
7700 fputs ("@GOTTPOFF(%rip)", file);
7701 else
7702 fputs ("@GOTNTPOFF", file);
7703 break;
7704 case UNSPEC_INDNTPOFF:
7705 fputs ("@INDNTPOFF", file);
7706 break;
7707 default:
7708 output_operand_lossage ("invalid UNSPEC as operand");
7709 break;
7710 }
7711 break;
7712
7713 default:
7714 output_operand_lossage ("invalid expression as operand");
7715 }
7716 }
7717
7718 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7719 We need to emit DTP-relative relocations. */
7720
7721 static void
7722 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7723 {
7724 fputs (ASM_LONG, file);
7725 output_addr_const (file, x);
7726 fputs ("@DTPOFF", file);
7727 switch (size)
7728 {
7729 case 4:
7730 break;
7731 case 8:
7732 fputs (", 0", file);
7733 break;
7734 default:
7735 gcc_unreachable ();
7736 }
7737 }
7738
7739 /* In the name of slightly smaller debug output, and to cater to
7740 general assembler lossage, recognize PIC+GOTOFF and turn it back
7741 into a direct symbol reference.
7742
7743 On Darwin, this is necessary to avoid a crash, because Darwin
7744 has a different PIC label for each routine but the DWARF debugging
7745 information is not associated with any particular routine, so it's
7746 necessary to remove references to the PIC label from RTL stored by
7747 the DWARF output code. */
7748
7749 static rtx
7750 ix86_delegitimize_address (rtx orig_x)
7751 {
7752 rtx x = orig_x;
7753 /* reg_addend is NULL or a multiple of some register. */
7754 rtx reg_addend = NULL_RTX;
7755 /* const_addend is NULL or a const_int. */
7756 rtx const_addend = NULL_RTX;
7757 /* This is the result, or NULL. */
7758 rtx result = NULL_RTX;
7759
7760 if (MEM_P (x))
7761 x = XEXP (x, 0);
7762
7763 if (TARGET_64BIT)
7764 {
7765 if (GET_CODE (x) != CONST
7766 || GET_CODE (XEXP (x, 0)) != UNSPEC
7767 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7768 || !MEM_P (orig_x))
7769 return orig_x;
7770 return XVECEXP (XEXP (x, 0), 0, 0);
7771 }
7772
7773 if (GET_CODE (x) != PLUS
7774 || GET_CODE (XEXP (x, 1)) != CONST)
7775 return orig_x;
7776
7777 if (REG_P (XEXP (x, 0))
7778 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7779 /* %ebx + GOT/GOTOFF */
7780 ;
7781 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7782 {
7783 /* %ebx + %reg * scale + GOT/GOTOFF */
7784 reg_addend = XEXP (x, 0);
7785 if (REG_P (XEXP (reg_addend, 0))
7786 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7787 reg_addend = XEXP (reg_addend, 1);
7788 else if (REG_P (XEXP (reg_addend, 1))
7789 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7790 reg_addend = XEXP (reg_addend, 0);
7791 else
7792 return orig_x;
7793 if (!REG_P (reg_addend)
7794 && GET_CODE (reg_addend) != MULT
7795 && GET_CODE (reg_addend) != ASHIFT)
7796 return orig_x;
7797 }
7798 else
7799 return orig_x;
7800
7801 x = XEXP (XEXP (x, 1), 0);
7802 if (GET_CODE (x) == PLUS
7803 && CONST_INT_P (XEXP (x, 1)))
7804 {
7805 const_addend = XEXP (x, 1);
7806 x = XEXP (x, 0);
7807 }
7808
7809 if (GET_CODE (x) == UNSPEC
7810 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7811 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7812 result = XVECEXP (x, 0, 0);
7813
7814 if (TARGET_MACHO && darwin_local_data_pic (x)
7815 && !MEM_P (orig_x))
7816 result = XEXP (x, 0);
7817
7818 if (! result)
7819 return orig_x;
7820
7821 if (const_addend)
7822 result = gen_rtx_PLUS (Pmode, result, const_addend);
7823 if (reg_addend)
7824 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7825 return result;
7826 }
7827 \f
7828 static void
7829 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7830 int fp, FILE *file)
7831 {
7832 const char *suffix;
7833
7834 if (mode == CCFPmode || mode == CCFPUmode)
7835 {
7836 enum rtx_code second_code, bypass_code;
7837 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7838 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7839 code = ix86_fp_compare_code_to_integer (code);
7840 mode = CCmode;
7841 }
7842 if (reverse)
7843 code = reverse_condition (code);
7844
7845 switch (code)
7846 {
7847 case EQ:
7848 suffix = "e";
7849 break;
7850 case NE:
7851 suffix = "ne";
7852 break;
7853 case GT:
7854 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7855 suffix = "g";
7856 break;
7857 case GTU:
7858 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7859 Those same assemblers have the same but opposite lossage on cmov. */
7860 gcc_assert (mode == CCmode);
7861 suffix = fp ? "nbe" : "a";
7862 break;
7863 case LT:
7864 switch (mode)
7865 {
7866 case CCNOmode:
7867 case CCGOCmode:
7868 suffix = "s";
7869 break;
7870
7871 case CCmode:
7872 case CCGCmode:
7873 suffix = "l";
7874 break;
7875
7876 default:
7877 gcc_unreachable ();
7878 }
7879 break;
7880 case LTU:
7881 gcc_assert (mode == CCmode);
7882 suffix = "b";
7883 break;
7884 case GE:
7885 switch (mode)
7886 {
7887 case CCNOmode:
7888 case CCGOCmode:
7889 suffix = "ns";
7890 break;
7891
7892 case CCmode:
7893 case CCGCmode:
7894 suffix = "ge";
7895 break;
7896
7897 default:
7898 gcc_unreachable ();
7899 }
7900 break;
7901 case GEU:
7902 /* ??? As above. */
7903 gcc_assert (mode == CCmode);
7904 suffix = fp ? "nb" : "ae";
7905 break;
7906 case LE:
7907 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7908 suffix = "le";
7909 break;
7910 case LEU:
7911 gcc_assert (mode == CCmode);
7912 suffix = "be";
7913 break;
7914 case UNORDERED:
7915 suffix = fp ? "u" : "p";
7916 break;
7917 case ORDERED:
7918 suffix = fp ? "nu" : "np";
7919 break;
7920 default:
7921 gcc_unreachable ();
7922 }
7923 fputs (suffix, file);
7924 }
7925
7926 /* Print the name of register X to FILE based on its machine mode and number.
7927 If CODE is 'w', pretend the mode is HImode.
7928 If CODE is 'b', pretend the mode is QImode.
7929 If CODE is 'k', pretend the mode is SImode.
7930 If CODE is 'q', pretend the mode is DImode.
7931 If CODE is 'h', pretend the reg is the 'high' byte register.
7932 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7933
7934 void
7935 print_reg (rtx x, int code, FILE *file)
7936 {
7937 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7938 && REGNO (x) != FRAME_POINTER_REGNUM
7939 && REGNO (x) != FLAGS_REG
7940 && REGNO (x) != FPSR_REG
7941 && REGNO (x) != FPCR_REG);
7942
7943 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7944 putc ('%', file);
7945
7946 if (code == 'w' || MMX_REG_P (x))
7947 code = 2;
7948 else if (code == 'b')
7949 code = 1;
7950 else if (code == 'k')
7951 code = 4;
7952 else if (code == 'q')
7953 code = 8;
7954 else if (code == 'y')
7955 code = 3;
7956 else if (code == 'h')
7957 code = 0;
7958 else
7959 code = GET_MODE_SIZE (GET_MODE (x));
7960
7961 /* Irritatingly, AMD extended registers use different naming convention
7962 from the normal registers. */
7963 if (REX_INT_REG_P (x))
7964 {
7965 gcc_assert (TARGET_64BIT);
7966 switch (code)
7967 {
7968 case 0:
7969 error ("extended registers have no high halves");
7970 break;
7971 case 1:
7972 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7973 break;
7974 case 2:
7975 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7976 break;
7977 case 4:
7978 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7979 break;
7980 case 8:
7981 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7982 break;
7983 default:
7984 error ("unsupported operand size for extended register");
7985 break;
7986 }
7987 return;
7988 }
7989 switch (code)
7990 {
7991 case 3:
7992 if (STACK_TOP_P (x))
7993 {
7994 fputs ("st(0)", file);
7995 break;
7996 }
7997 /* FALLTHRU */
7998 case 8:
7999 case 4:
8000 case 12:
8001 if (! ANY_FP_REG_P (x))
8002 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8003 /* FALLTHRU */
8004 case 16:
8005 case 2:
8006 normal:
8007 fputs (hi_reg_name[REGNO (x)], file);
8008 break;
8009 case 1:
8010 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8011 goto normal;
8012 fputs (qi_reg_name[REGNO (x)], file);
8013 break;
8014 case 0:
8015 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8016 goto normal;
8017 fputs (qi_high_reg_name[REGNO (x)], file);
8018 break;
8019 default:
8020 gcc_unreachable ();
8021 }
8022 }
8023
8024 /* Locate some local-dynamic symbol still in use by this function
8025 so that we can print its name in some tls_local_dynamic_base
8026 pattern. */
8027
8028 static const char *
8029 get_some_local_dynamic_name (void)
8030 {
8031 rtx insn;
8032
8033 if (cfun->machine->some_ld_name)
8034 return cfun->machine->some_ld_name;
8035
8036 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8037 if (INSN_P (insn)
8038 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8039 return cfun->machine->some_ld_name;
8040
8041 gcc_unreachable ();
8042 }
8043
8044 static int
8045 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8046 {
8047 rtx x = *px;
8048
8049 if (GET_CODE (x) == SYMBOL_REF
8050 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8051 {
8052 cfun->machine->some_ld_name = XSTR (x, 0);
8053 return 1;
8054 }
8055
8056 return 0;
8057 }
8058
8059 /* Meaning of CODE:
8060 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8061 C -- print opcode suffix for set/cmov insn.
8062 c -- like C, but print reversed condition
8063 F,f -- likewise, but for floating-point.
8064 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8065 otherwise nothing
8066 R -- print the prefix for register names.
8067 z -- print the opcode suffix for the size of the current operand.
8068 * -- print a star (in certain assembler syntax)
8069 A -- print an absolute memory reference.
8070 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8071 s -- print a shift double count, followed by the assemblers argument
8072 delimiter.
8073 b -- print the QImode name of the register for the indicated operand.
8074 %b0 would print %al if operands[0] is reg 0.
8075 w -- likewise, print the HImode name of the register.
8076 k -- likewise, print the SImode name of the register.
8077 q -- likewise, print the DImode name of the register.
8078 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8079 y -- print "st(0)" instead of "st" as a register.
8080 D -- print condition for SSE cmp instruction.
8081 P -- if PIC, print an @PLT suffix.
8082 X -- don't print any sort of PIC '@' suffix for a symbol.
8083 & -- print some in-use local-dynamic symbol name.
8084 H -- print a memory address offset by 8; used for sse high-parts
8085 */
8086
8087 void
8088 print_operand (FILE *file, rtx x, int code)
8089 {
8090 if (code)
8091 {
8092 switch (code)
8093 {
8094 case '*':
8095 if (ASSEMBLER_DIALECT == ASM_ATT)
8096 putc ('*', file);
8097 return;
8098
8099 case '&':
8100 assemble_name (file, get_some_local_dynamic_name ());
8101 return;
8102
8103 case 'A':
8104 switch (ASSEMBLER_DIALECT)
8105 {
8106 case ASM_ATT:
8107 putc ('*', file);
8108 break;
8109
8110 case ASM_INTEL:
8111 /* Intel syntax. For absolute addresses, registers should not
8112 be surrounded by braces. */
8113 if (!REG_P (x))
8114 {
8115 putc ('[', file);
8116 PRINT_OPERAND (file, x, 0);
8117 putc (']', file);
8118 return;
8119 }
8120 break;
8121
8122 default:
8123 gcc_unreachable ();
8124 }
8125
8126 PRINT_OPERAND (file, x, 0);
8127 return;
8128
8129
8130 case 'L':
8131 if (ASSEMBLER_DIALECT == ASM_ATT)
8132 putc ('l', file);
8133 return;
8134
8135 case 'W':
8136 if (ASSEMBLER_DIALECT == ASM_ATT)
8137 putc ('w', file);
8138 return;
8139
8140 case 'B':
8141 if (ASSEMBLER_DIALECT == ASM_ATT)
8142 putc ('b', file);
8143 return;
8144
8145 case 'Q':
8146 if (ASSEMBLER_DIALECT == ASM_ATT)
8147 putc ('l', file);
8148 return;
8149
8150 case 'S':
8151 if (ASSEMBLER_DIALECT == ASM_ATT)
8152 putc ('s', file);
8153 return;
8154
8155 case 'T':
8156 if (ASSEMBLER_DIALECT == ASM_ATT)
8157 putc ('t', file);
8158 return;
8159
8160 case 'z':
8161 /* 387 opcodes don't get size suffixes if the operands are
8162 registers. */
8163 if (STACK_REG_P (x))
8164 return;
8165
8166 /* Likewise if using Intel opcodes. */
8167 if (ASSEMBLER_DIALECT == ASM_INTEL)
8168 return;
8169
8170 /* This is the size of op from size of operand. */
8171 switch (GET_MODE_SIZE (GET_MODE (x)))
8172 {
8173 case 1:
8174 putc ('b', file);
8175 return;
8176
8177 case 2:
8178 #ifdef HAVE_GAS_FILDS_FISTS
8179 putc ('s', file);
8180 #endif
8181 return;
8182
8183 case 4:
8184 if (GET_MODE (x) == SFmode)
8185 {
8186 putc ('s', file);
8187 return;
8188 }
8189 else
8190 putc ('l', file);
8191 return;
8192
8193 case 12:
8194 case 16:
8195 putc ('t', file);
8196 return;
8197
8198 case 8:
8199 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8200 {
8201 #ifdef GAS_MNEMONICS
8202 putc ('q', file);
8203 #else
8204 putc ('l', file);
8205 putc ('l', file);
8206 #endif
8207 }
8208 else
8209 putc ('l', file);
8210 return;
8211
8212 default:
8213 gcc_unreachable ();
8214 }
8215
8216 case 'b':
8217 case 'w':
8218 case 'k':
8219 case 'q':
8220 case 'h':
8221 case 'y':
8222 case 'X':
8223 case 'P':
8224 break;
8225
8226 case 's':
8227 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8228 {
8229 PRINT_OPERAND (file, x, 0);
8230 putc (',', file);
8231 }
8232 return;
8233
8234 case 'D':
8235 /* Little bit of braindamage here. The SSE compare instructions
8236 does use completely different names for the comparisons that the
8237 fp conditional moves. */
8238 switch (GET_CODE (x))
8239 {
8240 case EQ:
8241 case UNEQ:
8242 fputs ("eq", file);
8243 break;
8244 case LT:
8245 case UNLT:
8246 fputs ("lt", file);
8247 break;
8248 case LE:
8249 case UNLE:
8250 fputs ("le", file);
8251 break;
8252 case UNORDERED:
8253 fputs ("unord", file);
8254 break;
8255 case NE:
8256 case LTGT:
8257 fputs ("neq", file);
8258 break;
8259 case UNGE:
8260 case GE:
8261 fputs ("nlt", file);
8262 break;
8263 case UNGT:
8264 case GT:
8265 fputs ("nle", file);
8266 break;
8267 case ORDERED:
8268 fputs ("ord", file);
8269 break;
8270 default:
8271 gcc_unreachable ();
8272 }
8273 return;
8274 case 'O':
8275 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8276 if (ASSEMBLER_DIALECT == ASM_ATT)
8277 {
8278 switch (GET_MODE (x))
8279 {
8280 case HImode: putc ('w', file); break;
8281 case SImode:
8282 case SFmode: putc ('l', file); break;
8283 case DImode:
8284 case DFmode: putc ('q', file); break;
8285 default: gcc_unreachable ();
8286 }
8287 putc ('.', file);
8288 }
8289 #endif
8290 return;
8291 case 'C':
8292 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8293 return;
8294 case 'F':
8295 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8296 if (ASSEMBLER_DIALECT == ASM_ATT)
8297 putc ('.', file);
8298 #endif
8299 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8300 return;
8301
8302 /* Like above, but reverse condition */
8303 case 'c':
8304 /* Check to see if argument to %c is really a constant
8305 and not a condition code which needs to be reversed. */
8306 if (!COMPARISON_P (x))
8307 {
8308 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8309 return;
8310 }
8311 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8312 return;
8313 case 'f':
8314 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8315 if (ASSEMBLER_DIALECT == ASM_ATT)
8316 putc ('.', file);
8317 #endif
8318 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8319 return;
8320
8321 case 'H':
8322 /* It doesn't actually matter what mode we use here, as we're
8323 only going to use this for printing. */
8324 x = adjust_address_nv (x, DImode, 8);
8325 break;
8326
8327 case '+':
8328 {
8329 rtx x;
8330
8331 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8332 return;
8333
8334 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8335 if (x)
8336 {
8337 int pred_val = INTVAL (XEXP (x, 0));
8338
8339 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8340 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8341 {
8342 int taken = pred_val > REG_BR_PROB_BASE / 2;
8343 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8344
8345 /* Emit hints only in the case default branch prediction
8346 heuristics would fail. */
8347 if (taken != cputaken)
8348 {
8349 /* We use 3e (DS) prefix for taken branches and
8350 2e (CS) prefix for not taken branches. */
8351 if (taken)
8352 fputs ("ds ; ", file);
8353 else
8354 fputs ("cs ; ", file);
8355 }
8356 }
8357 }
8358 return;
8359 }
8360 default:
8361 output_operand_lossage ("invalid operand code '%c'", code);
8362 }
8363 }
8364
8365 if (REG_P (x))
8366 print_reg (x, code, file);
8367
8368 else if (MEM_P (x))
8369 {
8370 /* No `byte ptr' prefix for call instructions. */
8371 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8372 {
8373 const char * size;
8374 switch (GET_MODE_SIZE (GET_MODE (x)))
8375 {
8376 case 1: size = "BYTE"; break;
8377 case 2: size = "WORD"; break;
8378 case 4: size = "DWORD"; break;
8379 case 8: size = "QWORD"; break;
8380 case 12: size = "XWORD"; break;
8381 case 16: size = "XMMWORD"; break;
8382 default:
8383 gcc_unreachable ();
8384 }
8385
8386 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8387 if (code == 'b')
8388 size = "BYTE";
8389 else if (code == 'w')
8390 size = "WORD";
8391 else if (code == 'k')
8392 size = "DWORD";
8393
8394 fputs (size, file);
8395 fputs (" PTR ", file);
8396 }
8397
8398 x = XEXP (x, 0);
8399 /* Avoid (%rip) for call operands. */
8400 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8401 && !CONST_INT_P (x))
8402 output_addr_const (file, x);
8403 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8404 output_operand_lossage ("invalid constraints for operand");
8405 else
8406 output_address (x);
8407 }
8408
8409 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8410 {
8411 REAL_VALUE_TYPE r;
8412 long l;
8413
8414 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8415 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8416
8417 if (ASSEMBLER_DIALECT == ASM_ATT)
8418 putc ('$', file);
8419 fprintf (file, "0x%08lx", l);
8420 }
8421
8422 /* These float cases don't actually occur as immediate operands. */
8423 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8424 {
8425 char dstr[30];
8426
8427 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8428 fprintf (file, "%s", dstr);
8429 }
8430
8431 else if (GET_CODE (x) == CONST_DOUBLE
8432 && GET_MODE (x) == XFmode)
8433 {
8434 char dstr[30];
8435
8436 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8437 fprintf (file, "%s", dstr);
8438 }
8439
8440 else
8441 {
8442 /* We have patterns that allow zero sets of memory, for instance.
8443 In 64-bit mode, we should probably support all 8-byte vectors,
8444 since we can in fact encode that into an immediate. */
8445 if (GET_CODE (x) == CONST_VECTOR)
8446 {
8447 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8448 x = const0_rtx;
8449 }
8450
8451 if (code != 'P')
8452 {
8453 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8454 {
8455 if (ASSEMBLER_DIALECT == ASM_ATT)
8456 putc ('$', file);
8457 }
8458 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8459 || GET_CODE (x) == LABEL_REF)
8460 {
8461 if (ASSEMBLER_DIALECT == ASM_ATT)
8462 putc ('$', file);
8463 else
8464 fputs ("OFFSET FLAT:", file);
8465 }
8466 }
8467 if (CONST_INT_P (x))
8468 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8469 else if (flag_pic)
8470 output_pic_addr_const (file, x, code);
8471 else
8472 output_addr_const (file, x);
8473 }
8474 }
8475 \f
8476 /* Print a memory operand whose address is ADDR. */
8477
8478 void
8479 print_operand_address (FILE *file, rtx addr)
8480 {
8481 struct ix86_address parts;
8482 rtx base, index, disp;
8483 int scale;
8484 int ok = ix86_decompose_address (addr, &parts);
8485
8486 gcc_assert (ok);
8487
8488 base = parts.base;
8489 index = parts.index;
8490 disp = parts.disp;
8491 scale = parts.scale;
8492
8493 switch (parts.seg)
8494 {
8495 case SEG_DEFAULT:
8496 break;
8497 case SEG_FS:
8498 case SEG_GS:
8499 if (USER_LABEL_PREFIX[0] == 0)
8500 putc ('%', file);
8501 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8502 break;
8503 default:
8504 gcc_unreachable ();
8505 }
8506
8507 if (!base && !index)
8508 {
8509 /* Displacement only requires special attention. */
8510
8511 if (CONST_INT_P (disp))
8512 {
8513 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8514 {
8515 if (USER_LABEL_PREFIX[0] == 0)
8516 putc ('%', file);
8517 fputs ("ds:", file);
8518 }
8519 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8520 }
8521 else if (flag_pic)
8522 output_pic_addr_const (file, disp, 0);
8523 else
8524 output_addr_const (file, disp);
8525
8526 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8527 if (TARGET_64BIT)
8528 {
8529 if (GET_CODE (disp) == CONST
8530 && GET_CODE (XEXP (disp, 0)) == PLUS
8531 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8532 disp = XEXP (XEXP (disp, 0), 0);
8533 if (GET_CODE (disp) == LABEL_REF
8534 || (GET_CODE (disp) == SYMBOL_REF
8535 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8536 fputs ("(%rip)", file);
8537 }
8538 }
8539 else
8540 {
8541 if (ASSEMBLER_DIALECT == ASM_ATT)
8542 {
8543 if (disp)
8544 {
8545 if (flag_pic)
8546 output_pic_addr_const (file, disp, 0);
8547 else if (GET_CODE (disp) == LABEL_REF)
8548 output_asm_label (disp);
8549 else
8550 output_addr_const (file, disp);
8551 }
8552
8553 putc ('(', file);
8554 if (base)
8555 print_reg (base, 0, file);
8556 if (index)
8557 {
8558 putc (',', file);
8559 print_reg (index, 0, file);
8560 if (scale != 1)
8561 fprintf (file, ",%d", scale);
8562 }
8563 putc (')', file);
8564 }
8565 else
8566 {
8567 rtx offset = NULL_RTX;
8568
8569 if (disp)
8570 {
8571 /* Pull out the offset of a symbol; print any symbol itself. */
8572 if (GET_CODE (disp) == CONST
8573 && GET_CODE (XEXP (disp, 0)) == PLUS
8574 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8575 {
8576 offset = XEXP (XEXP (disp, 0), 1);
8577 disp = gen_rtx_CONST (VOIDmode,
8578 XEXP (XEXP (disp, 0), 0));
8579 }
8580
8581 if (flag_pic)
8582 output_pic_addr_const (file, disp, 0);
8583 else if (GET_CODE (disp) == LABEL_REF)
8584 output_asm_label (disp);
8585 else if (CONST_INT_P (disp))
8586 offset = disp;
8587 else
8588 output_addr_const (file, disp);
8589 }
8590
8591 putc ('[', file);
8592 if (base)
8593 {
8594 print_reg (base, 0, file);
8595 if (offset)
8596 {
8597 if (INTVAL (offset) >= 0)
8598 putc ('+', file);
8599 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8600 }
8601 }
8602 else if (offset)
8603 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8604 else
8605 putc ('0', file);
8606
8607 if (index)
8608 {
8609 putc ('+', file);
8610 print_reg (index, 0, file);
8611 if (scale != 1)
8612 fprintf (file, "*%d", scale);
8613 }
8614 putc (']', file);
8615 }
8616 }
8617 }
8618
8619 bool
8620 output_addr_const_extra (FILE *file, rtx x)
8621 {
8622 rtx op;
8623
8624 if (GET_CODE (x) != UNSPEC)
8625 return false;
8626
8627 op = XVECEXP (x, 0, 0);
8628 switch (XINT (x, 1))
8629 {
8630 case UNSPEC_GOTTPOFF:
8631 output_addr_const (file, op);
8632 /* FIXME: This might be @TPOFF in Sun ld. */
8633 fputs ("@GOTTPOFF", file);
8634 break;
8635 case UNSPEC_TPOFF:
8636 output_addr_const (file, op);
8637 fputs ("@TPOFF", file);
8638 break;
8639 case UNSPEC_NTPOFF:
8640 output_addr_const (file, op);
8641 if (TARGET_64BIT)
8642 fputs ("@TPOFF", file);
8643 else
8644 fputs ("@NTPOFF", file);
8645 break;
8646 case UNSPEC_DTPOFF:
8647 output_addr_const (file, op);
8648 fputs ("@DTPOFF", file);
8649 break;
8650 case UNSPEC_GOTNTPOFF:
8651 output_addr_const (file, op);
8652 if (TARGET_64BIT)
8653 fputs ("@GOTTPOFF(%rip)", file);
8654 else
8655 fputs ("@GOTNTPOFF", file);
8656 break;
8657 case UNSPEC_INDNTPOFF:
8658 output_addr_const (file, op);
8659 fputs ("@INDNTPOFF", file);
8660 break;
8661
8662 default:
8663 return false;
8664 }
8665
8666 return true;
8667 }
8668 \f
8669 /* Split one or more DImode RTL references into pairs of SImode
8670 references. The RTL can be REG, offsettable MEM, integer constant, or
8671 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8672 split and "num" is its length. lo_half and hi_half are output arrays
8673 that parallel "operands". */
8674
8675 void
8676 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8677 {
8678 while (num--)
8679 {
8680 rtx op = operands[num];
8681
8682 /* simplify_subreg refuse to split volatile memory addresses,
8683 but we still have to handle it. */
8684 if (MEM_P (op))
8685 {
8686 lo_half[num] = adjust_address (op, SImode, 0);
8687 hi_half[num] = adjust_address (op, SImode, 4);
8688 }
8689 else
8690 {
8691 lo_half[num] = simplify_gen_subreg (SImode, op,
8692 GET_MODE (op) == VOIDmode
8693 ? DImode : GET_MODE (op), 0);
8694 hi_half[num] = simplify_gen_subreg (SImode, op,
8695 GET_MODE (op) == VOIDmode
8696 ? DImode : GET_MODE (op), 4);
8697 }
8698 }
8699 }
8700 /* Split one or more TImode RTL references into pairs of DImode
8701 references. The RTL can be REG, offsettable MEM, integer constant, or
8702 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8703 split and "num" is its length. lo_half and hi_half are output arrays
8704 that parallel "operands". */
8705
8706 void
8707 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8708 {
8709 while (num--)
8710 {
8711 rtx op = operands[num];
8712
8713 /* simplify_subreg refuse to split volatile memory addresses, but we
8714 still have to handle it. */
8715 if (MEM_P (op))
8716 {
8717 lo_half[num] = adjust_address (op, DImode, 0);
8718 hi_half[num] = adjust_address (op, DImode, 8);
8719 }
8720 else
8721 {
8722 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8723 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8724 }
8725 }
8726 }
8727 \f
8728 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8729 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8730 is the expression of the binary operation. The output may either be
8731 emitted here, or returned to the caller, like all output_* functions.
8732
8733 There is no guarantee that the operands are the same mode, as they
8734 might be within FLOAT or FLOAT_EXTEND expressions. */
8735
8736 #ifndef SYSV386_COMPAT
8737 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8738 wants to fix the assemblers because that causes incompatibility
8739 with gcc. No-one wants to fix gcc because that causes
8740 incompatibility with assemblers... You can use the option of
8741 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8742 #define SYSV386_COMPAT 1
8743 #endif
8744
8745 const char *
8746 output_387_binary_op (rtx insn, rtx *operands)
8747 {
8748 static char buf[30];
8749 const char *p;
8750 const char *ssep;
8751 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8752
8753 #ifdef ENABLE_CHECKING
8754 /* Even if we do not want to check the inputs, this documents input
8755 constraints. Which helps in understanding the following code. */
8756 if (STACK_REG_P (operands[0])
8757 && ((REG_P (operands[1])
8758 && REGNO (operands[0]) == REGNO (operands[1])
8759 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8760 || (REG_P (operands[2])
8761 && REGNO (operands[0]) == REGNO (operands[2])
8762 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8763 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8764 ; /* ok */
8765 else
8766 gcc_assert (is_sse);
8767 #endif
8768
8769 switch (GET_CODE (operands[3]))
8770 {
8771 case PLUS:
8772 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8773 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8774 p = "fiadd";
8775 else
8776 p = "fadd";
8777 ssep = "add";
8778 break;
8779
8780 case MINUS:
8781 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8782 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8783 p = "fisub";
8784 else
8785 p = "fsub";
8786 ssep = "sub";
8787 break;
8788
8789 case MULT:
8790 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8791 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8792 p = "fimul";
8793 else
8794 p = "fmul";
8795 ssep = "mul";
8796 break;
8797
8798 case DIV:
8799 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8800 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8801 p = "fidiv";
8802 else
8803 p = "fdiv";
8804 ssep = "div";
8805 break;
8806
8807 default:
8808 gcc_unreachable ();
8809 }
8810
8811 if (is_sse)
8812 {
8813 strcpy (buf, ssep);
8814 if (GET_MODE (operands[0]) == SFmode)
8815 strcat (buf, "ss\t{%2, %0|%0, %2}");
8816 else
8817 strcat (buf, "sd\t{%2, %0|%0, %2}");
8818 return buf;
8819 }
8820 strcpy (buf, p);
8821
8822 switch (GET_CODE (operands[3]))
8823 {
8824 case MULT:
8825 case PLUS:
8826 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8827 {
8828 rtx temp = operands[2];
8829 operands[2] = operands[1];
8830 operands[1] = temp;
8831 }
8832
8833 /* know operands[0] == operands[1]. */
8834
8835 if (MEM_P (operands[2]))
8836 {
8837 p = "%z2\t%2";
8838 break;
8839 }
8840
8841 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8842 {
8843 if (STACK_TOP_P (operands[0]))
8844 /* How is it that we are storing to a dead operand[2]?
8845 Well, presumably operands[1] is dead too. We can't
8846 store the result to st(0) as st(0) gets popped on this
8847 instruction. Instead store to operands[2] (which I
8848 think has to be st(1)). st(1) will be popped later.
8849 gcc <= 2.8.1 didn't have this check and generated
8850 assembly code that the Unixware assembler rejected. */
8851 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8852 else
8853 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8854 break;
8855 }
8856
8857 if (STACK_TOP_P (operands[0]))
8858 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8859 else
8860 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8861 break;
8862
8863 case MINUS:
8864 case DIV:
8865 if (MEM_P (operands[1]))
8866 {
8867 p = "r%z1\t%1";
8868 break;
8869 }
8870
8871 if (MEM_P (operands[2]))
8872 {
8873 p = "%z2\t%2";
8874 break;
8875 }
8876
8877 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8878 {
8879 #if SYSV386_COMPAT
8880 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8881 derived assemblers, confusingly reverse the direction of
8882 the operation for fsub{r} and fdiv{r} when the
8883 destination register is not st(0). The Intel assembler
8884 doesn't have this brain damage. Read !SYSV386_COMPAT to
8885 figure out what the hardware really does. */
8886 if (STACK_TOP_P (operands[0]))
8887 p = "{p\t%0, %2|rp\t%2, %0}";
8888 else
8889 p = "{rp\t%2, %0|p\t%0, %2}";
8890 #else
8891 if (STACK_TOP_P (operands[0]))
8892 /* As above for fmul/fadd, we can't store to st(0). */
8893 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8894 else
8895 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8896 #endif
8897 break;
8898 }
8899
8900 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8901 {
8902 #if SYSV386_COMPAT
8903 if (STACK_TOP_P (operands[0]))
8904 p = "{rp\t%0, %1|p\t%1, %0}";
8905 else
8906 p = "{p\t%1, %0|rp\t%0, %1}";
8907 #else
8908 if (STACK_TOP_P (operands[0]))
8909 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8910 else
8911 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8912 #endif
8913 break;
8914 }
8915
8916 if (STACK_TOP_P (operands[0]))
8917 {
8918 if (STACK_TOP_P (operands[1]))
8919 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8920 else
8921 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8922 break;
8923 }
8924 else if (STACK_TOP_P (operands[1]))
8925 {
8926 #if SYSV386_COMPAT
8927 p = "{\t%1, %0|r\t%0, %1}";
8928 #else
8929 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8930 #endif
8931 }
8932 else
8933 {
8934 #if SYSV386_COMPAT
8935 p = "{r\t%2, %0|\t%0, %2}";
8936 #else
8937 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8938 #endif
8939 }
8940 break;
8941
8942 default:
8943 gcc_unreachable ();
8944 }
8945
8946 strcat (buf, p);
8947 return buf;
8948 }
8949
8950 /* Return needed mode for entity in optimize_mode_switching pass. */
8951
8952 int
8953 ix86_mode_needed (int entity, rtx insn)
8954 {
8955 enum attr_i387_cw mode;
8956
8957 /* The mode UNINITIALIZED is used to store control word after a
8958 function call or ASM pattern. The mode ANY specify that function
8959 has no requirements on the control word and make no changes in the
8960 bits we are interested in. */
8961
8962 if (CALL_P (insn)
8963 || (NONJUMP_INSN_P (insn)
8964 && (asm_noperands (PATTERN (insn)) >= 0
8965 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8966 return I387_CW_UNINITIALIZED;
8967
8968 if (recog_memoized (insn) < 0)
8969 return I387_CW_ANY;
8970
8971 mode = get_attr_i387_cw (insn);
8972
8973 switch (entity)
8974 {
8975 case I387_TRUNC:
8976 if (mode == I387_CW_TRUNC)
8977 return mode;
8978 break;
8979
8980 case I387_FLOOR:
8981 if (mode == I387_CW_FLOOR)
8982 return mode;
8983 break;
8984
8985 case I387_CEIL:
8986 if (mode == I387_CW_CEIL)
8987 return mode;
8988 break;
8989
8990 case I387_MASK_PM:
8991 if (mode == I387_CW_MASK_PM)
8992 return mode;
8993 break;
8994
8995 default:
8996 gcc_unreachable ();
8997 }
8998
8999 return I387_CW_ANY;
9000 }
9001
9002 /* Output code to initialize control word copies used by trunc?f?i and
9003 rounding patterns. CURRENT_MODE is set to current control word,
9004 while NEW_MODE is set to new control word. */
9005
9006 void
9007 emit_i387_cw_initialization (int mode)
9008 {
9009 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9010 rtx new_mode;
9011
9012 int slot;
9013
9014 rtx reg = gen_reg_rtx (HImode);
9015
9016 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9017 emit_move_insn (reg, copy_rtx (stored_mode));
9018
9019 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9020 {
9021 switch (mode)
9022 {
9023 case I387_CW_TRUNC:
9024 /* round toward zero (truncate) */
9025 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9026 slot = SLOT_CW_TRUNC;
9027 break;
9028
9029 case I387_CW_FLOOR:
9030 /* round down toward -oo */
9031 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9032 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9033 slot = SLOT_CW_FLOOR;
9034 break;
9035
9036 case I387_CW_CEIL:
9037 /* round up toward +oo */
9038 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9039 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9040 slot = SLOT_CW_CEIL;
9041 break;
9042
9043 case I387_CW_MASK_PM:
9044 /* mask precision exception for nearbyint() */
9045 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9046 slot = SLOT_CW_MASK_PM;
9047 break;
9048
9049 default:
9050 gcc_unreachable ();
9051 }
9052 }
9053 else
9054 {
9055 switch (mode)
9056 {
9057 case I387_CW_TRUNC:
9058 /* round toward zero (truncate) */
9059 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9060 slot = SLOT_CW_TRUNC;
9061 break;
9062
9063 case I387_CW_FLOOR:
9064 /* round down toward -oo */
9065 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9066 slot = SLOT_CW_FLOOR;
9067 break;
9068
9069 case I387_CW_CEIL:
9070 /* round up toward +oo */
9071 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9072 slot = SLOT_CW_CEIL;
9073 break;
9074
9075 case I387_CW_MASK_PM:
9076 /* mask precision exception for nearbyint() */
9077 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9078 slot = SLOT_CW_MASK_PM;
9079 break;
9080
9081 default:
9082 gcc_unreachable ();
9083 }
9084 }
9085
9086 gcc_assert (slot < MAX_386_STACK_LOCALS);
9087
9088 new_mode = assign_386_stack_local (HImode, slot);
9089 emit_move_insn (new_mode, reg);
9090 }
9091
9092 /* Output code for INSN to convert a float to a signed int. OPERANDS
9093 are the insn operands. The output may be [HSD]Imode and the input
9094 operand may be [SDX]Fmode. */
9095
9096 const char *
9097 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9098 {
9099 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9100 int dimode_p = GET_MODE (operands[0]) == DImode;
9101 int round_mode = get_attr_i387_cw (insn);
9102
9103 /* Jump through a hoop or two for DImode, since the hardware has no
9104 non-popping instruction. We used to do this a different way, but
9105 that was somewhat fragile and broke with post-reload splitters. */
9106 if ((dimode_p || fisttp) && !stack_top_dies)
9107 output_asm_insn ("fld\t%y1", operands);
9108
9109 gcc_assert (STACK_TOP_P (operands[1]));
9110 gcc_assert (MEM_P (operands[0]));
9111
9112 if (fisttp)
9113 output_asm_insn ("fisttp%z0\t%0", operands);
9114 else
9115 {
9116 if (round_mode != I387_CW_ANY)
9117 output_asm_insn ("fldcw\t%3", operands);
9118 if (stack_top_dies || dimode_p)
9119 output_asm_insn ("fistp%z0\t%0", operands);
9120 else
9121 output_asm_insn ("fist%z0\t%0", operands);
9122 if (round_mode != I387_CW_ANY)
9123 output_asm_insn ("fldcw\t%2", operands);
9124 }
9125
9126 return "";
9127 }
9128
9129 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9130 have the values zero or one, indicates the ffreep insn's operand
9131 from the OPERANDS array. */
9132
9133 static const char *
9134 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9135 {
9136 if (TARGET_USE_FFREEP)
9137 #if HAVE_AS_IX86_FFREEP
9138 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9139 #else
9140 {
9141 static char retval[] = ".word\t0xc_df";
9142 int regno = REGNO (operands[opno]);
9143
9144 gcc_assert (FP_REGNO_P (regno));
9145
9146 retval[9] = '0' + (regno - FIRST_STACK_REG);
9147 return retval;
9148 }
9149 #endif
9150
9151 return opno ? "fstp\t%y1" : "fstp\t%y0";
9152 }
9153
9154
9155 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9156 should be used. UNORDERED_P is true when fucom should be used. */
9157
9158 const char *
9159 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9160 {
9161 int stack_top_dies;
9162 rtx cmp_op0, cmp_op1;
9163 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9164
9165 if (eflags_p)
9166 {
9167 cmp_op0 = operands[0];
9168 cmp_op1 = operands[1];
9169 }
9170 else
9171 {
9172 cmp_op0 = operands[1];
9173 cmp_op1 = operands[2];
9174 }
9175
9176 if (is_sse)
9177 {
9178 if (GET_MODE (operands[0]) == SFmode)
9179 if (unordered_p)
9180 return "ucomiss\t{%1, %0|%0, %1}";
9181 else
9182 return "comiss\t{%1, %0|%0, %1}";
9183 else
9184 if (unordered_p)
9185 return "ucomisd\t{%1, %0|%0, %1}";
9186 else
9187 return "comisd\t{%1, %0|%0, %1}";
9188 }
9189
9190 gcc_assert (STACK_TOP_P (cmp_op0));
9191
9192 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9193
9194 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9195 {
9196 if (stack_top_dies)
9197 {
9198 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9199 return output_387_ffreep (operands, 1);
9200 }
9201 else
9202 return "ftst\n\tfnstsw\t%0";
9203 }
9204
9205 if (STACK_REG_P (cmp_op1)
9206 && stack_top_dies
9207 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9208 && REGNO (cmp_op1) != FIRST_STACK_REG)
9209 {
9210 /* If both the top of the 387 stack dies, and the other operand
9211 is also a stack register that dies, then this must be a
9212 `fcompp' float compare */
9213
9214 if (eflags_p)
9215 {
9216 /* There is no double popping fcomi variant. Fortunately,
9217 eflags is immune from the fstp's cc clobbering. */
9218 if (unordered_p)
9219 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9220 else
9221 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9222 return output_387_ffreep (operands, 0);
9223 }
9224 else
9225 {
9226 if (unordered_p)
9227 return "fucompp\n\tfnstsw\t%0";
9228 else
9229 return "fcompp\n\tfnstsw\t%0";
9230 }
9231 }
9232 else
9233 {
9234 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9235
9236 static const char * const alt[16] =
9237 {
9238 "fcom%z2\t%y2\n\tfnstsw\t%0",
9239 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9240 "fucom%z2\t%y2\n\tfnstsw\t%0",
9241 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9242
9243 "ficom%z2\t%y2\n\tfnstsw\t%0",
9244 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9245 NULL,
9246 NULL,
9247
9248 "fcomi\t{%y1, %0|%0, %y1}",
9249 "fcomip\t{%y1, %0|%0, %y1}",
9250 "fucomi\t{%y1, %0|%0, %y1}",
9251 "fucomip\t{%y1, %0|%0, %y1}",
9252
9253 NULL,
9254 NULL,
9255 NULL,
9256 NULL
9257 };
9258
9259 int mask;
9260 const char *ret;
9261
9262 mask = eflags_p << 3;
9263 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9264 mask |= unordered_p << 1;
9265 mask |= stack_top_dies;
9266
9267 gcc_assert (mask < 16);
9268 ret = alt[mask];
9269 gcc_assert (ret);
9270
9271 return ret;
9272 }
9273 }
9274
9275 void
9276 ix86_output_addr_vec_elt (FILE *file, int value)
9277 {
9278 const char *directive = ASM_LONG;
9279
9280 #ifdef ASM_QUAD
9281 if (TARGET_64BIT)
9282 directive = ASM_QUAD;
9283 #else
9284 gcc_assert (!TARGET_64BIT);
9285 #endif
9286
9287 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9288 }
9289
9290 void
9291 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9292 {
9293 if (TARGET_64BIT)
9294 fprintf (file, "%s%s%d-%s%d\n",
9295 ASM_LONG, LPREFIX, value, LPREFIX, rel);
9296 else if (HAVE_AS_GOTOFF_IN_DATA)
9297 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9298 #if TARGET_MACHO
9299 else if (TARGET_MACHO)
9300 {
9301 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9302 machopic_output_function_base_name (file);
9303 fprintf(file, "\n");
9304 }
9305 #endif
9306 else
9307 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9308 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9309 }
9310 \f
9311 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9312 for the target. */
9313
9314 void
9315 ix86_expand_clear (rtx dest)
9316 {
9317 rtx tmp;
9318
9319 /* We play register width games, which are only valid after reload. */
9320 gcc_assert (reload_completed);
9321
9322 /* Avoid HImode and its attendant prefix byte. */
9323 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9324 dest = gen_rtx_REG (SImode, REGNO (dest));
9325
9326 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9327
9328 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9329 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9330 {
9331 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9332 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9333 }
9334
9335 emit_insn (tmp);
9336 }
9337
9338 /* X is an unchanging MEM. If it is a constant pool reference, return
9339 the constant pool rtx, else NULL. */
9340
9341 rtx
9342 maybe_get_pool_constant (rtx x)
9343 {
9344 x = ix86_delegitimize_address (XEXP (x, 0));
9345
9346 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9347 return get_pool_constant (x);
9348
9349 return NULL_RTX;
9350 }
9351
9352 void
9353 ix86_expand_move (enum machine_mode mode, rtx operands[])
9354 {
9355 int strict = (reload_in_progress || reload_completed);
9356 rtx op0, op1;
9357 enum tls_model model;
9358
9359 op0 = operands[0];
9360 op1 = operands[1];
9361
9362 if (GET_CODE (op1) == SYMBOL_REF)
9363 {
9364 model = SYMBOL_REF_TLS_MODEL (op1);
9365 if (model)
9366 {
9367 op1 = legitimize_tls_address (op1, model, true);
9368 op1 = force_operand (op1, op0);
9369 if (op1 == op0)
9370 return;
9371 }
9372 }
9373 else if (GET_CODE (op1) == CONST
9374 && GET_CODE (XEXP (op1, 0)) == PLUS
9375 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9376 {
9377 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9378 if (model)
9379 {
9380 rtx addend = XEXP (XEXP (op1, 0), 1);
9381 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9382 op1 = force_operand (op1, NULL);
9383 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9384 op0, 1, OPTAB_DIRECT);
9385 if (op1 == op0)
9386 return;
9387 }
9388 }
9389
9390 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9391 {
9392 if (TARGET_MACHO && !TARGET_64BIT)
9393 {
9394 #if TARGET_MACHO
9395 if (MACHOPIC_PURE)
9396 {
9397 rtx temp = ((reload_in_progress
9398 || ((op0 && REG_P (op0))
9399 && mode == Pmode))
9400 ? op0 : gen_reg_rtx (Pmode));
9401 op1 = machopic_indirect_data_reference (op1, temp);
9402 op1 = machopic_legitimize_pic_address (op1, mode,
9403 temp == op1 ? 0 : temp);
9404 }
9405 else if (MACHOPIC_INDIRECT)
9406 op1 = machopic_indirect_data_reference (op1, 0);
9407 if (op0 == op1)
9408 return;
9409 #endif
9410 }
9411 else
9412 {
9413 if (MEM_P (op0))
9414 op1 = force_reg (Pmode, op1);
9415 else
9416 op1 = legitimize_address (op1, op1, Pmode);
9417 }
9418 }
9419 else
9420 {
9421 if (MEM_P (op0)
9422 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9423 || !push_operand (op0, mode))
9424 && MEM_P (op1))
9425 op1 = force_reg (mode, op1);
9426
9427 if (push_operand (op0, mode)
9428 && ! general_no_elim_operand (op1, mode))
9429 op1 = copy_to_mode_reg (mode, op1);
9430
9431 /* Force large constants in 64bit compilation into register
9432 to get them CSEed. */
9433 if (TARGET_64BIT && mode == DImode
9434 && immediate_operand (op1, mode)
9435 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9436 && !register_operand (op0, mode)
9437 && optimize && !reload_completed && !reload_in_progress)
9438 op1 = copy_to_mode_reg (mode, op1);
9439
9440 if (FLOAT_MODE_P (mode))
9441 {
9442 /* If we are loading a floating point constant to a register,
9443 force the value to memory now, since we'll get better code
9444 out the back end. */
9445
9446 if (strict)
9447 ;
9448 else if (GET_CODE (op1) == CONST_DOUBLE)
9449 {
9450 op1 = validize_mem (force_const_mem (mode, op1));
9451 if (!register_operand (op0, mode))
9452 {
9453 rtx temp = gen_reg_rtx (mode);
9454 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9455 emit_move_insn (op0, temp);
9456 return;
9457 }
9458 }
9459 }
9460 }
9461
9462 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9463 }
9464
9465 void
9466 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9467 {
9468 rtx op0 = operands[0], op1 = operands[1];
9469
9470 /* Force constants other than zero into memory. We do not know how
9471 the instructions used to build constants modify the upper 64 bits
9472 of the register, once we have that information we may be able
9473 to handle some of them more efficiently. */
9474 if ((reload_in_progress | reload_completed) == 0
9475 && register_operand (op0, mode)
9476 && CONSTANT_P (op1)
9477 && standard_sse_constant_p (op1) <= 0)
9478 op1 = validize_mem (force_const_mem (mode, op1));
9479
9480 /* Make operand1 a register if it isn't already. */
9481 if (!no_new_pseudos
9482 && !register_operand (op0, mode)
9483 && !register_operand (op1, mode))
9484 {
9485 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9486 return;
9487 }
9488
9489 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9490 }
9491
9492 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9493 straight to ix86_expand_vector_move. */
9494
9495 void
9496 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9497 {
9498 rtx op0, op1, m;
9499
9500 op0 = operands[0];
9501 op1 = operands[1];
9502
9503 if (MEM_P (op1))
9504 {
9505 /* If we're optimizing for size, movups is the smallest. */
9506 if (optimize_size)
9507 {
9508 op0 = gen_lowpart (V4SFmode, op0);
9509 op1 = gen_lowpart (V4SFmode, op1);
9510 emit_insn (gen_sse_movups (op0, op1));
9511 return;
9512 }
9513
9514 /* ??? If we have typed data, then it would appear that using
9515 movdqu is the only way to get unaligned data loaded with
9516 integer type. */
9517 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9518 {
9519 op0 = gen_lowpart (V16QImode, op0);
9520 op1 = gen_lowpart (V16QImode, op1);
9521 emit_insn (gen_sse2_movdqu (op0, op1));
9522 return;
9523 }
9524
9525 if (TARGET_SSE2 && mode == V2DFmode)
9526 {
9527 rtx zero;
9528
9529 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9530 {
9531 op0 = gen_lowpart (V2DFmode, op0);
9532 op1 = gen_lowpart (V2DFmode, op1);
9533 emit_insn (gen_sse2_movupd (op0, op1));
9534 return;
9535 }
9536
9537 /* When SSE registers are split into halves, we can avoid
9538 writing to the top half twice. */
9539 if (TARGET_SSE_SPLIT_REGS)
9540 {
9541 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9542 zero = op0;
9543 }
9544 else
9545 {
9546 /* ??? Not sure about the best option for the Intel chips.
9547 The following would seem to satisfy; the register is
9548 entirely cleared, breaking the dependency chain. We
9549 then store to the upper half, with a dependency depth
9550 of one. A rumor has it that Intel recommends two movsd
9551 followed by an unpacklpd, but this is unconfirmed. And
9552 given that the dependency depth of the unpacklpd would
9553 still be one, I'm not sure why this would be better. */
9554 zero = CONST0_RTX (V2DFmode);
9555 }
9556
9557 m = adjust_address (op1, DFmode, 0);
9558 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9559 m = adjust_address (op1, DFmode, 8);
9560 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9561 }
9562 else
9563 {
9564 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9565 {
9566 op0 = gen_lowpart (V4SFmode, op0);
9567 op1 = gen_lowpart (V4SFmode, op1);
9568 emit_insn (gen_sse_movups (op0, op1));
9569 return;
9570 }
9571
9572 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9573 emit_move_insn (op0, CONST0_RTX (mode));
9574 else
9575 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9576
9577 if (mode != V4SFmode)
9578 op0 = gen_lowpart (V4SFmode, op0);
9579 m = adjust_address (op1, V2SFmode, 0);
9580 emit_insn (gen_sse_loadlps (op0, op0, m));
9581 m = adjust_address (op1, V2SFmode, 8);
9582 emit_insn (gen_sse_loadhps (op0, op0, m));
9583 }
9584 }
9585 else if (MEM_P (op0))
9586 {
9587 /* If we're optimizing for size, movups is the smallest. */
9588 if (optimize_size)
9589 {
9590 op0 = gen_lowpart (V4SFmode, op0);
9591 op1 = gen_lowpart (V4SFmode, op1);
9592 emit_insn (gen_sse_movups (op0, op1));
9593 return;
9594 }
9595
9596 /* ??? Similar to above, only less clear because of quote
9597 typeless stores unquote. */
9598 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9599 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9600 {
9601 op0 = gen_lowpart (V16QImode, op0);
9602 op1 = gen_lowpart (V16QImode, op1);
9603 emit_insn (gen_sse2_movdqu (op0, op1));
9604 return;
9605 }
9606
9607 if (TARGET_SSE2 && mode == V2DFmode)
9608 {
9609 m = adjust_address (op0, DFmode, 0);
9610 emit_insn (gen_sse2_storelpd (m, op1));
9611 m = adjust_address (op0, DFmode, 8);
9612 emit_insn (gen_sse2_storehpd (m, op1));
9613 }
9614 else
9615 {
9616 if (mode != V4SFmode)
9617 op1 = gen_lowpart (V4SFmode, op1);
9618 m = adjust_address (op0, V2SFmode, 0);
9619 emit_insn (gen_sse_storelps (m, op1));
9620 m = adjust_address (op0, V2SFmode, 8);
9621 emit_insn (gen_sse_storehps (m, op1));
9622 }
9623 }
9624 else
9625 gcc_unreachable ();
9626 }
9627
9628 /* Expand a push in MODE. This is some mode for which we do not support
9629 proper push instructions, at least from the registers that we expect
9630 the value to live in. */
9631
9632 void
9633 ix86_expand_push (enum machine_mode mode, rtx x)
9634 {
9635 rtx tmp;
9636
9637 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9638 GEN_INT (-GET_MODE_SIZE (mode)),
9639 stack_pointer_rtx, 1, OPTAB_DIRECT);
9640 if (tmp != stack_pointer_rtx)
9641 emit_move_insn (stack_pointer_rtx, tmp);
9642
9643 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9644 emit_move_insn (tmp, x);
9645 }
9646
9647 /* Helper function of ix86_fixup_binary_operands to canonicalize
9648 operand order. Returns true if the operands should be swapped. */
9649
9650 static bool
9651 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9652 rtx operands[])
9653 {
9654 rtx dst = operands[0];
9655 rtx src1 = operands[1];
9656 rtx src2 = operands[2];
9657
9658 /* If the operation is not commutative, we can't do anything. */
9659 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9660 return false;
9661
9662 /* Highest priority is that src1 should match dst. */
9663 if (rtx_equal_p (dst, src1))
9664 return false;
9665 if (rtx_equal_p (dst, src2))
9666 return true;
9667
9668 /* Next highest priority is that immediate constants come second. */
9669 if (immediate_operand (src2, mode))
9670 return false;
9671 if (immediate_operand (src1, mode))
9672 return true;
9673
9674 /* Lowest priority is that memory references should come second. */
9675 if (MEM_P (src2))
9676 return false;
9677 if (MEM_P (src1))
9678 return true;
9679
9680 return false;
9681 }
9682
9683
9684 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9685 destination to use for the operation. If different from the true
9686 destination in operands[0], a copy operation will be required. */
9687
9688 rtx
9689 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9690 rtx operands[])
9691 {
9692 rtx dst = operands[0];
9693 rtx src1 = operands[1];
9694 rtx src2 = operands[2];
9695
9696 /* Canonicalize operand order. */
9697 if (ix86_swap_binary_operands_p (code, mode, operands))
9698 {
9699 rtx temp = src1;
9700 src1 = src2;
9701 src2 = temp;
9702 }
9703
9704 /* Both source operands cannot be in memory. */
9705 if (MEM_P (src1) && MEM_P (src2))
9706 {
9707 /* Optimization: Only read from memory once. */
9708 if (rtx_equal_p (src1, src2))
9709 {
9710 src2 = force_reg (mode, src2);
9711 src1 = src2;
9712 }
9713 else
9714 src2 = force_reg (mode, src2);
9715 }
9716
9717 /* If the destination is memory, and we do not have matching source
9718 operands, do things in registers. */
9719 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9720 dst = gen_reg_rtx (mode);
9721
9722 /* Source 1 cannot be a constant. */
9723 if (CONSTANT_P (src1))
9724 src1 = force_reg (mode, src1);
9725
9726 /* Source 1 cannot be a non-matching memory. */
9727 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9728 src1 = force_reg (mode, src1);
9729
9730 operands[1] = src1;
9731 operands[2] = src2;
9732 return dst;
9733 }
9734
9735 /* Similarly, but assume that the destination has already been
9736 set up properly. */
9737
9738 void
9739 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9740 enum machine_mode mode, rtx operands[])
9741 {
9742 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9743 gcc_assert (dst == operands[0]);
9744 }
9745
9746 /* Attempt to expand a binary operator. Make the expansion closer to the
9747 actual machine, then just general_operand, which will allow 3 separate
9748 memory references (one output, two input) in a single insn. */
9749
9750 void
9751 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9752 rtx operands[])
9753 {
9754 rtx src1, src2, dst, op, clob;
9755
9756 dst = ix86_fixup_binary_operands (code, mode, operands);
9757 src1 = operands[1];
9758 src2 = operands[2];
9759
9760 /* Emit the instruction. */
9761
9762 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9763 if (reload_in_progress)
9764 {
9765 /* Reload doesn't know about the flags register, and doesn't know that
9766 it doesn't want to clobber it. We can only do this with PLUS. */
9767 gcc_assert (code == PLUS);
9768 emit_insn (op);
9769 }
9770 else
9771 {
9772 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9773 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9774 }
9775
9776 /* Fix up the destination if needed. */
9777 if (dst != operands[0])
9778 emit_move_insn (operands[0], dst);
9779 }
9780
9781 /* Return TRUE or FALSE depending on whether the binary operator meets the
9782 appropriate constraints. */
9783
9784 int
9785 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9786 rtx operands[3])
9787 {
9788 rtx dst = operands[0];
9789 rtx src1 = operands[1];
9790 rtx src2 = operands[2];
9791
9792 /* Both source operands cannot be in memory. */
9793 if (MEM_P (src1) && MEM_P (src2))
9794 return 0;
9795
9796 /* Canonicalize operand order for commutative operators. */
9797 if (ix86_swap_binary_operands_p (code, mode, operands))
9798 {
9799 rtx temp = src1;
9800 src1 = src2;
9801 src2 = temp;
9802 }
9803
9804 /* If the destination is memory, we must have a matching source operand. */
9805 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9806 return 0;
9807
9808 /* Source 1 cannot be a constant. */
9809 if (CONSTANT_P (src1))
9810 return 0;
9811
9812 /* Source 1 cannot be a non-matching memory. */
9813 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9814 return 0;
9815
9816 return 1;
9817 }
9818
9819 /* Attempt to expand a unary operator. Make the expansion closer to the
9820 actual machine, then just general_operand, which will allow 2 separate
9821 memory references (one output, one input) in a single insn. */
9822
9823 void
9824 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9825 rtx operands[])
9826 {
9827 int matching_memory;
9828 rtx src, dst, op, clob;
9829
9830 dst = operands[0];
9831 src = operands[1];
9832
9833 /* If the destination is memory, and we do not have matching source
9834 operands, do things in registers. */
9835 matching_memory = 0;
9836 if (MEM_P (dst))
9837 {
9838 if (rtx_equal_p (dst, src))
9839 matching_memory = 1;
9840 else
9841 dst = gen_reg_rtx (mode);
9842 }
9843
9844 /* When source operand is memory, destination must match. */
9845 if (MEM_P (src) && !matching_memory)
9846 src = force_reg (mode, src);
9847
9848 /* Emit the instruction. */
9849
9850 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9851 if (reload_in_progress || code == NOT)
9852 {
9853 /* Reload doesn't know about the flags register, and doesn't know that
9854 it doesn't want to clobber it. */
9855 gcc_assert (code == NOT);
9856 emit_insn (op);
9857 }
9858 else
9859 {
9860 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9861 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9862 }
9863
9864 /* Fix up the destination if needed. */
9865 if (dst != operands[0])
9866 emit_move_insn (operands[0], dst);
9867 }
9868
9869 /* Return TRUE or FALSE depending on whether the unary operator meets the
9870 appropriate constraints. */
9871
9872 int
9873 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9874 enum machine_mode mode ATTRIBUTE_UNUSED,
9875 rtx operands[2] ATTRIBUTE_UNUSED)
9876 {
9877 /* If one of operands is memory, source and destination must match. */
9878 if ((MEM_P (operands[0])
9879 || MEM_P (operands[1]))
9880 && ! rtx_equal_p (operands[0], operands[1]))
9881 return FALSE;
9882 return TRUE;
9883 }
9884
9885 /* Post-reload splitter for converting an SF or DFmode value in an
9886 SSE register into an unsigned SImode. */
9887
9888 void
9889 ix86_split_convert_uns_si_sse (rtx operands[])
9890 {
9891 enum machine_mode vecmode;
9892 rtx value, large, zero_or_two31, input, two31, x;
9893
9894 large = operands[1];
9895 zero_or_two31 = operands[2];
9896 input = operands[3];
9897 two31 = operands[4];
9898 vecmode = GET_MODE (large);
9899 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9900
9901 /* Load up the value into the low element. We must ensure that the other
9902 elements are valid floats -- zero is the easiest such value. */
9903 if (MEM_P (input))
9904 {
9905 if (vecmode == V4SFmode)
9906 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9907 else
9908 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9909 }
9910 else
9911 {
9912 input = gen_rtx_REG (vecmode, REGNO (input));
9913 emit_move_insn (value, CONST0_RTX (vecmode));
9914 if (vecmode == V4SFmode)
9915 emit_insn (gen_sse_movss (value, value, input));
9916 else
9917 emit_insn (gen_sse2_movsd (value, value, input));
9918 }
9919
9920 emit_move_insn (large, two31);
9921 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9922
9923 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9924 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9925
9926 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9927 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9928
9929 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9930 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9931
9932 large = gen_rtx_REG (V4SImode, REGNO (large));
9933 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9934
9935 x = gen_rtx_REG (V4SImode, REGNO (value));
9936 if (vecmode == V4SFmode)
9937 emit_insn (gen_sse2_cvttps2dq (x, value));
9938 else
9939 emit_insn (gen_sse2_cvttpd2dq (x, value));
9940 value = x;
9941
9942 emit_insn (gen_xorv4si3 (value, value, large));
9943 }
9944
9945 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9946 Expects the 64-bit DImode to be supplied in a pair of integral
9947 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9948 -mfpmath=sse, !optimize_size only. */
9949
9950 void
9951 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9952 {
9953 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9954 rtx int_xmm, fp_xmm;
9955 rtx biases, exponents;
9956 rtx x;
9957
9958 int_xmm = gen_reg_rtx (V4SImode);
9959 if (TARGET_INTER_UNIT_MOVES)
9960 emit_insn (gen_movdi_to_sse (int_xmm, input));
9961 else if (TARGET_SSE_SPLIT_REGS)
9962 {
9963 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9964 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9965 }
9966 else
9967 {
9968 x = gen_reg_rtx (V2DImode);
9969 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9970 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9971 }
9972
9973 x = gen_rtx_CONST_VECTOR (V4SImode,
9974 gen_rtvec (4, GEN_INT (0x43300000UL),
9975 GEN_INT (0x45300000UL),
9976 const0_rtx, const0_rtx));
9977 exponents = validize_mem (force_const_mem (V4SImode, x));
9978
9979 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9980 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9981
9982 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9983 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9984 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9985 (0x1.0p84 + double(fp_value_hi_xmm)).
9986 Note these exponents differ by 32. */
9987
9988 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9989
9990 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9991 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9992 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9993 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9994 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9995 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9996 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9997 biases = validize_mem (force_const_mem (V2DFmode, biases));
9998 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9999
10000 /* Add the upper and lower DFmode values together. */
10001 if (TARGET_SSE3)
10002 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10003 else
10004 {
10005 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10006 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10007 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10008 }
10009
10010 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10011 }
10012
10013 /* Convert an unsigned SImode value into a DFmode. Only currently used
10014 for SSE, but applicable anywhere. */
10015
10016 void
10017 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10018 {
10019 REAL_VALUE_TYPE TWO31r;
10020 rtx x, fp;
10021
10022 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10023 NULL, 1, OPTAB_DIRECT);
10024
10025 fp = gen_reg_rtx (DFmode);
10026 emit_insn (gen_floatsidf2 (fp, x));
10027
10028 real_ldexp (&TWO31r, &dconst1, 31);
10029 x = const_double_from_real_value (TWO31r, DFmode);
10030
10031 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10032 if (x != target)
10033 emit_move_insn (target, x);
10034 }
10035
10036 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10037 32-bit mode; otherwise we have a direct convert instruction. */
10038
10039 void
10040 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10041 {
10042 REAL_VALUE_TYPE TWO32r;
10043 rtx fp_lo, fp_hi, x;
10044
10045 fp_lo = gen_reg_rtx (DFmode);
10046 fp_hi = gen_reg_rtx (DFmode);
10047
10048 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10049
10050 real_ldexp (&TWO32r, &dconst1, 32);
10051 x = const_double_from_real_value (TWO32r, DFmode);
10052 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10053
10054 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10055
10056 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10057 0, OPTAB_DIRECT);
10058 if (x != target)
10059 emit_move_insn (target, x);
10060 }
10061
10062 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10063 For x86_32, -mfpmath=sse, !optimize_size only. */
10064 void
10065 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10066 {
10067 REAL_VALUE_TYPE ONE16r;
10068 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10069
10070 real_ldexp (&ONE16r, &dconst1, 16);
10071 x = const_double_from_real_value (ONE16r, SFmode);
10072 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10073 NULL, 0, OPTAB_DIRECT);
10074 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10075 NULL, 0, OPTAB_DIRECT);
10076 fp_hi = gen_reg_rtx (SFmode);
10077 fp_lo = gen_reg_rtx (SFmode);
10078 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10079 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10080 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10081 0, OPTAB_DIRECT);
10082 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10083 0, OPTAB_DIRECT);
10084 if (!rtx_equal_p (target, fp_hi))
10085 emit_move_insn (target, fp_hi);
10086 }
10087
10088 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10089 then replicate the value for all elements of the vector
10090 register. */
10091
10092 rtx
10093 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10094 {
10095 rtvec v;
10096 switch (mode)
10097 {
10098 case SFmode:
10099 if (vect)
10100 v = gen_rtvec (4, value, value, value, value);
10101 else
10102 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10103 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10104 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10105
10106 case DFmode:
10107 if (vect)
10108 v = gen_rtvec (2, value, value);
10109 else
10110 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10111 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10112
10113 default:
10114 gcc_unreachable ();
10115 }
10116 }
10117
10118 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10119 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10120 true, then replicate the mask for all elements of the vector register.
10121 If INVERT is true, then create a mask excluding the sign bit. */
10122
10123 rtx
10124 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10125 {
10126 enum machine_mode vec_mode;
10127 HOST_WIDE_INT hi, lo;
10128 int shift = 63;
10129 rtx v;
10130 rtx mask;
10131
10132 /* Find the sign bit, sign extended to 2*HWI. */
10133 if (mode == SFmode)
10134 lo = 0x80000000, hi = lo < 0;
10135 else if (HOST_BITS_PER_WIDE_INT >= 64)
10136 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10137 else
10138 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10139
10140 if (invert)
10141 lo = ~lo, hi = ~hi;
10142
10143 /* Force this value into the low part of a fp vector constant. */
10144 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10145 mask = gen_lowpart (mode, mask);
10146
10147 v = ix86_build_const_vector (mode, vect, mask);
10148 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10149 return force_reg (vec_mode, v);
10150 }
10151
10152 /* Generate code for floating point ABS or NEG. */
10153
10154 void
10155 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10156 rtx operands[])
10157 {
10158 rtx mask, set, use, clob, dst, src;
10159 bool matching_memory;
10160 bool use_sse = false;
10161 bool vector_mode = VECTOR_MODE_P (mode);
10162 enum machine_mode elt_mode = mode;
10163
10164 if (vector_mode)
10165 {
10166 elt_mode = GET_MODE_INNER (mode);
10167 use_sse = true;
10168 }
10169 else if (TARGET_SSE_MATH)
10170 use_sse = SSE_FLOAT_MODE_P (mode);
10171
10172 /* NEG and ABS performed with SSE use bitwise mask operations.
10173 Create the appropriate mask now. */
10174 if (use_sse)
10175 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10176 else
10177 mask = NULL_RTX;
10178
10179 dst = operands[0];
10180 src = operands[1];
10181
10182 /* If the destination is memory, and we don't have matching source
10183 operands or we're using the x87, do things in registers. */
10184 matching_memory = false;
10185 if (MEM_P (dst))
10186 {
10187 if (use_sse && rtx_equal_p (dst, src))
10188 matching_memory = true;
10189 else
10190 dst = gen_reg_rtx (mode);
10191 }
10192 if (MEM_P (src) && !matching_memory)
10193 src = force_reg (mode, src);
10194
10195 if (vector_mode)
10196 {
10197 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10198 set = gen_rtx_SET (VOIDmode, dst, set);
10199 emit_insn (set);
10200 }
10201 else
10202 {
10203 set = gen_rtx_fmt_e (code, mode, src);
10204 set = gen_rtx_SET (VOIDmode, dst, set);
10205 if (mask)
10206 {
10207 use = gen_rtx_USE (VOIDmode, mask);
10208 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10209 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10210 gen_rtvec (3, set, use, clob)));
10211 }
10212 else
10213 emit_insn (set);
10214 }
10215
10216 if (dst != operands[0])
10217 emit_move_insn (operands[0], dst);
10218 }
10219
10220 /* Expand a copysign operation. Special case operand 0 being a constant. */
10221
10222 void
10223 ix86_expand_copysign (rtx operands[])
10224 {
10225 enum machine_mode mode, vmode;
10226 rtx dest, op0, op1, mask, nmask;
10227
10228 dest = operands[0];
10229 op0 = operands[1];
10230 op1 = operands[2];
10231
10232 mode = GET_MODE (dest);
10233 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10234
10235 if (GET_CODE (op0) == CONST_DOUBLE)
10236 {
10237 rtvec v;
10238
10239 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10240 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10241
10242 if (op0 == CONST0_RTX (mode))
10243 op0 = CONST0_RTX (vmode);
10244 else
10245 {
10246 if (mode == SFmode)
10247 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10248 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10249 else
10250 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10251 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10252 }
10253
10254 mask = ix86_build_signbit_mask (mode, 0, 0);
10255
10256 if (mode == SFmode)
10257 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10258 else
10259 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10260 }
10261 else
10262 {
10263 nmask = ix86_build_signbit_mask (mode, 0, 1);
10264 mask = ix86_build_signbit_mask (mode, 0, 0);
10265
10266 if (mode == SFmode)
10267 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10268 else
10269 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10270 }
10271 }
10272
10273 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10274 be a constant, and so has already been expanded into a vector constant. */
10275
10276 void
10277 ix86_split_copysign_const (rtx operands[])
10278 {
10279 enum machine_mode mode, vmode;
10280 rtx dest, op0, op1, mask, x;
10281
10282 dest = operands[0];
10283 op0 = operands[1];
10284 op1 = operands[2];
10285 mask = operands[3];
10286
10287 mode = GET_MODE (dest);
10288 vmode = GET_MODE (mask);
10289
10290 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10291 x = gen_rtx_AND (vmode, dest, mask);
10292 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10293
10294 if (op0 != CONST0_RTX (vmode))
10295 {
10296 x = gen_rtx_IOR (vmode, dest, op0);
10297 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10298 }
10299 }
10300
10301 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10302 so we have to do two masks. */
10303
10304 void
10305 ix86_split_copysign_var (rtx operands[])
10306 {
10307 enum machine_mode mode, vmode;
10308 rtx dest, scratch, op0, op1, mask, nmask, x;
10309
10310 dest = operands[0];
10311 scratch = operands[1];
10312 op0 = operands[2];
10313 op1 = operands[3];
10314 nmask = operands[4];
10315 mask = operands[5];
10316
10317 mode = GET_MODE (dest);
10318 vmode = GET_MODE (mask);
10319
10320 if (rtx_equal_p (op0, op1))
10321 {
10322 /* Shouldn't happen often (it's useless, obviously), but when it does
10323 we'd generate incorrect code if we continue below. */
10324 emit_move_insn (dest, op0);
10325 return;
10326 }
10327
10328 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10329 {
10330 gcc_assert (REGNO (op1) == REGNO (scratch));
10331
10332 x = gen_rtx_AND (vmode, scratch, mask);
10333 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10334
10335 dest = mask;
10336 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10337 x = gen_rtx_NOT (vmode, dest);
10338 x = gen_rtx_AND (vmode, x, op0);
10339 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10340 }
10341 else
10342 {
10343 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10344 {
10345 x = gen_rtx_AND (vmode, scratch, mask);
10346 }
10347 else /* alternative 2,4 */
10348 {
10349 gcc_assert (REGNO (mask) == REGNO (scratch));
10350 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10351 x = gen_rtx_AND (vmode, scratch, op1);
10352 }
10353 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10354
10355 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10356 {
10357 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10358 x = gen_rtx_AND (vmode, dest, nmask);
10359 }
10360 else /* alternative 3,4 */
10361 {
10362 gcc_assert (REGNO (nmask) == REGNO (dest));
10363 dest = nmask;
10364 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10365 x = gen_rtx_AND (vmode, dest, op0);
10366 }
10367 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10368 }
10369
10370 x = gen_rtx_IOR (vmode, dest, scratch);
10371 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10372 }
10373
10374 /* Return TRUE or FALSE depending on whether the first SET in INSN
10375 has source and destination with matching CC modes, and that the
10376 CC mode is at least as constrained as REQ_MODE. */
10377
10378 int
10379 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10380 {
10381 rtx set;
10382 enum machine_mode set_mode;
10383
10384 set = PATTERN (insn);
10385 if (GET_CODE (set) == PARALLEL)
10386 set = XVECEXP (set, 0, 0);
10387 gcc_assert (GET_CODE (set) == SET);
10388 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10389
10390 set_mode = GET_MODE (SET_DEST (set));
10391 switch (set_mode)
10392 {
10393 case CCNOmode:
10394 if (req_mode != CCNOmode
10395 && (req_mode != CCmode
10396 || XEXP (SET_SRC (set), 1) != const0_rtx))
10397 return 0;
10398 break;
10399 case CCmode:
10400 if (req_mode == CCGCmode)
10401 return 0;
10402 /* FALLTHRU */
10403 case CCGCmode:
10404 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10405 return 0;
10406 /* FALLTHRU */
10407 case CCGOCmode:
10408 if (req_mode == CCZmode)
10409 return 0;
10410 /* FALLTHRU */
10411 case CCZmode:
10412 break;
10413
10414 default:
10415 gcc_unreachable ();
10416 }
10417
10418 return (GET_MODE (SET_SRC (set)) == set_mode);
10419 }
10420
10421 /* Generate insn patterns to do an integer compare of OPERANDS. */
10422
10423 static rtx
10424 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10425 {
10426 enum machine_mode cmpmode;
10427 rtx tmp, flags;
10428
10429 cmpmode = SELECT_CC_MODE (code, op0, op1);
10430 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10431
10432 /* This is very simple, but making the interface the same as in the
10433 FP case makes the rest of the code easier. */
10434 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10435 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10436
10437 /* Return the test that should be put into the flags user, i.e.
10438 the bcc, scc, or cmov instruction. */
10439 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10440 }
10441
10442 /* Figure out whether to use ordered or unordered fp comparisons.
10443 Return the appropriate mode to use. */
10444
10445 enum machine_mode
10446 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10447 {
10448 /* ??? In order to make all comparisons reversible, we do all comparisons
10449 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10450 all forms trapping and nontrapping comparisons, we can make inequality
10451 comparisons trapping again, since it results in better code when using
10452 FCOM based compares. */
10453 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10454 }
10455
10456 enum machine_mode
10457 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10458 {
10459 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10460 return ix86_fp_compare_mode (code);
10461 switch (code)
10462 {
10463 /* Only zero flag is needed. */
10464 case EQ: /* ZF=0 */
10465 case NE: /* ZF!=0 */
10466 return CCZmode;
10467 /* Codes needing carry flag. */
10468 case GEU: /* CF=0 */
10469 case GTU: /* CF=0 & ZF=0 */
10470 case LTU: /* CF=1 */
10471 case LEU: /* CF=1 | ZF=1 */
10472 return CCmode;
10473 /* Codes possibly doable only with sign flag when
10474 comparing against zero. */
10475 case GE: /* SF=OF or SF=0 */
10476 case LT: /* SF<>OF or SF=1 */
10477 if (op1 == const0_rtx)
10478 return CCGOCmode;
10479 else
10480 /* For other cases Carry flag is not required. */
10481 return CCGCmode;
10482 /* Codes doable only with sign flag when comparing
10483 against zero, but we miss jump instruction for it
10484 so we need to use relational tests against overflow
10485 that thus needs to be zero. */
10486 case GT: /* ZF=0 & SF=OF */
10487 case LE: /* ZF=1 | SF<>OF */
10488 if (op1 == const0_rtx)
10489 return CCNOmode;
10490 else
10491 return CCGCmode;
10492 /* strcmp pattern do (use flags) and combine may ask us for proper
10493 mode. */
10494 case USE:
10495 return CCmode;
10496 default:
10497 gcc_unreachable ();
10498 }
10499 }
10500
10501 /* Return the fixed registers used for condition codes. */
10502
10503 static bool
10504 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10505 {
10506 *p1 = FLAGS_REG;
10507 *p2 = FPSR_REG;
10508 return true;
10509 }
10510
10511 /* If two condition code modes are compatible, return a condition code
10512 mode which is compatible with both. Otherwise, return
10513 VOIDmode. */
10514
10515 static enum machine_mode
10516 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10517 {
10518 if (m1 == m2)
10519 return m1;
10520
10521 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10522 return VOIDmode;
10523
10524 if ((m1 == CCGCmode && m2 == CCGOCmode)
10525 || (m1 == CCGOCmode && m2 == CCGCmode))
10526 return CCGCmode;
10527
10528 switch (m1)
10529 {
10530 default:
10531 gcc_unreachable ();
10532
10533 case CCmode:
10534 case CCGCmode:
10535 case CCGOCmode:
10536 case CCNOmode:
10537 case CCZmode:
10538 switch (m2)
10539 {
10540 default:
10541 return VOIDmode;
10542
10543 case CCmode:
10544 case CCGCmode:
10545 case CCGOCmode:
10546 case CCNOmode:
10547 case CCZmode:
10548 return CCmode;
10549 }
10550
10551 case CCFPmode:
10552 case CCFPUmode:
10553 /* These are only compatible with themselves, which we already
10554 checked above. */
10555 return VOIDmode;
10556 }
10557 }
10558
10559 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10560
10561 int
10562 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10563 {
10564 enum rtx_code swapped_code = swap_condition (code);
10565 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10566 || (ix86_fp_comparison_cost (swapped_code)
10567 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10568 }
10569
10570 /* Swap, force into registers, or otherwise massage the two operands
10571 to a fp comparison. The operands are updated in place; the new
10572 comparison code is returned. */
10573
10574 static enum rtx_code
10575 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10576 {
10577 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10578 rtx op0 = *pop0, op1 = *pop1;
10579 enum machine_mode op_mode = GET_MODE (op0);
10580 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10581
10582 /* All of the unordered compare instructions only work on registers.
10583 The same is true of the fcomi compare instructions. The XFmode
10584 compare instructions require registers except when comparing
10585 against zero or when converting operand 1 from fixed point to
10586 floating point. */
10587
10588 if (!is_sse
10589 && (fpcmp_mode == CCFPUmode
10590 || (op_mode == XFmode
10591 && ! (standard_80387_constant_p (op0) == 1
10592 || standard_80387_constant_p (op1) == 1)
10593 && GET_CODE (op1) != FLOAT)
10594 || ix86_use_fcomi_compare (code)))
10595 {
10596 op0 = force_reg (op_mode, op0);
10597 op1 = force_reg (op_mode, op1);
10598 }
10599 else
10600 {
10601 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10602 things around if they appear profitable, otherwise force op0
10603 into a register. */
10604
10605 if (standard_80387_constant_p (op0) == 0
10606 || (MEM_P (op0)
10607 && ! (standard_80387_constant_p (op1) == 0
10608 || MEM_P (op1))))
10609 {
10610 rtx tmp;
10611 tmp = op0, op0 = op1, op1 = tmp;
10612 code = swap_condition (code);
10613 }
10614
10615 if (!REG_P (op0))
10616 op0 = force_reg (op_mode, op0);
10617
10618 if (CONSTANT_P (op1))
10619 {
10620 int tmp = standard_80387_constant_p (op1);
10621 if (tmp == 0)
10622 op1 = validize_mem (force_const_mem (op_mode, op1));
10623 else if (tmp == 1)
10624 {
10625 if (TARGET_CMOVE)
10626 op1 = force_reg (op_mode, op1);
10627 }
10628 else
10629 op1 = force_reg (op_mode, op1);
10630 }
10631 }
10632
10633 /* Try to rearrange the comparison to make it cheaper. */
10634 if (ix86_fp_comparison_cost (code)
10635 > ix86_fp_comparison_cost (swap_condition (code))
10636 && (REG_P (op1) || !no_new_pseudos))
10637 {
10638 rtx tmp;
10639 tmp = op0, op0 = op1, op1 = tmp;
10640 code = swap_condition (code);
10641 if (!REG_P (op0))
10642 op0 = force_reg (op_mode, op0);
10643 }
10644
10645 *pop0 = op0;
10646 *pop1 = op1;
10647 return code;
10648 }
10649
10650 /* Convert comparison codes we use to represent FP comparison to integer
10651 code that will result in proper branch. Return UNKNOWN if no such code
10652 is available. */
10653
10654 enum rtx_code
10655 ix86_fp_compare_code_to_integer (enum rtx_code code)
10656 {
10657 switch (code)
10658 {
10659 case GT:
10660 return GTU;
10661 case GE:
10662 return GEU;
10663 case ORDERED:
10664 case UNORDERED:
10665 return code;
10666 break;
10667 case UNEQ:
10668 return EQ;
10669 break;
10670 case UNLT:
10671 return LTU;
10672 break;
10673 case UNLE:
10674 return LEU;
10675 break;
10676 case LTGT:
10677 return NE;
10678 break;
10679 default:
10680 return UNKNOWN;
10681 }
10682 }
10683
10684 /* Split comparison code CODE into comparisons we can do using branch
10685 instructions. BYPASS_CODE is comparison code for branch that will
10686 branch around FIRST_CODE and SECOND_CODE. If some of branches
10687 is not required, set value to UNKNOWN.
10688 We never require more than two branches. */
10689
10690 void
10691 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10692 enum rtx_code *first_code,
10693 enum rtx_code *second_code)
10694 {
10695 *first_code = code;
10696 *bypass_code = UNKNOWN;
10697 *second_code = UNKNOWN;
10698
10699 /* The fcomi comparison sets flags as follows:
10700
10701 cmp ZF PF CF
10702 > 0 0 0
10703 < 0 0 1
10704 = 1 0 0
10705 un 1 1 1 */
10706
10707 switch (code)
10708 {
10709 case GT: /* GTU - CF=0 & ZF=0 */
10710 case GE: /* GEU - CF=0 */
10711 case ORDERED: /* PF=0 */
10712 case UNORDERED: /* PF=1 */
10713 case UNEQ: /* EQ - ZF=1 */
10714 case UNLT: /* LTU - CF=1 */
10715 case UNLE: /* LEU - CF=1 | ZF=1 */
10716 case LTGT: /* EQ - ZF=0 */
10717 break;
10718 case LT: /* LTU - CF=1 - fails on unordered */
10719 *first_code = UNLT;
10720 *bypass_code = UNORDERED;
10721 break;
10722 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10723 *first_code = UNLE;
10724 *bypass_code = UNORDERED;
10725 break;
10726 case EQ: /* EQ - ZF=1 - fails on unordered */
10727 *first_code = UNEQ;
10728 *bypass_code = UNORDERED;
10729 break;
10730 case NE: /* NE - ZF=0 - fails on unordered */
10731 *first_code = LTGT;
10732 *second_code = UNORDERED;
10733 break;
10734 case UNGE: /* GEU - CF=0 - fails on unordered */
10735 *first_code = GE;
10736 *second_code = UNORDERED;
10737 break;
10738 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10739 *first_code = GT;
10740 *second_code = UNORDERED;
10741 break;
10742 default:
10743 gcc_unreachable ();
10744 }
10745 if (!TARGET_IEEE_FP)
10746 {
10747 *second_code = UNKNOWN;
10748 *bypass_code = UNKNOWN;
10749 }
10750 }
10751
10752 /* Return cost of comparison done fcom + arithmetics operations on AX.
10753 All following functions do use number of instructions as a cost metrics.
10754 In future this should be tweaked to compute bytes for optimize_size and
10755 take into account performance of various instructions on various CPUs. */
10756 static int
10757 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10758 {
10759 if (!TARGET_IEEE_FP)
10760 return 4;
10761 /* The cost of code output by ix86_expand_fp_compare. */
10762 switch (code)
10763 {
10764 case UNLE:
10765 case UNLT:
10766 case LTGT:
10767 case GT:
10768 case GE:
10769 case UNORDERED:
10770 case ORDERED:
10771 case UNEQ:
10772 return 4;
10773 break;
10774 case LT:
10775 case NE:
10776 case EQ:
10777 case UNGE:
10778 return 5;
10779 break;
10780 case LE:
10781 case UNGT:
10782 return 6;
10783 break;
10784 default:
10785 gcc_unreachable ();
10786 }
10787 }
10788
10789 /* Return cost of comparison done using fcomi operation.
10790 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10791 static int
10792 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10793 {
10794 enum rtx_code bypass_code, first_code, second_code;
10795 /* Return arbitrarily high cost when instruction is not supported - this
10796 prevents gcc from using it. */
10797 if (!TARGET_CMOVE)
10798 return 1024;
10799 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10800 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10801 }
10802
10803 /* Return cost of comparison done using sahf operation.
10804 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10805 static int
10806 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10807 {
10808 enum rtx_code bypass_code, first_code, second_code;
10809 /* Return arbitrarily high cost when instruction is not preferred - this
10810 avoids gcc from using it. */
10811 if (!TARGET_USE_SAHF && !optimize_size)
10812 return 1024;
10813 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10814 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10815 }
10816
10817 /* Compute cost of the comparison done using any method.
10818 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10819 static int
10820 ix86_fp_comparison_cost (enum rtx_code code)
10821 {
10822 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10823 int min;
10824
10825 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10826 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10827
10828 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10829 if (min > sahf_cost)
10830 min = sahf_cost;
10831 if (min > fcomi_cost)
10832 min = fcomi_cost;
10833 return min;
10834 }
10835
10836 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10837
10838 static rtx
10839 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10840 rtx *second_test, rtx *bypass_test)
10841 {
10842 enum machine_mode fpcmp_mode, intcmp_mode;
10843 rtx tmp, tmp2;
10844 int cost = ix86_fp_comparison_cost (code);
10845 enum rtx_code bypass_code, first_code, second_code;
10846
10847 fpcmp_mode = ix86_fp_compare_mode (code);
10848 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10849
10850 if (second_test)
10851 *second_test = NULL_RTX;
10852 if (bypass_test)
10853 *bypass_test = NULL_RTX;
10854
10855 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10856
10857 /* Do fcomi/sahf based test when profitable. */
10858 if ((bypass_code == UNKNOWN || bypass_test)
10859 && (second_code == UNKNOWN || second_test)
10860 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10861 {
10862 if (TARGET_CMOVE)
10863 {
10864 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10865 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10866 tmp);
10867 emit_insn (tmp);
10868 }
10869 else
10870 {
10871 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10872 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10873 if (!scratch)
10874 scratch = gen_reg_rtx (HImode);
10875 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10876 emit_insn (gen_x86_sahf_1 (scratch));
10877 }
10878
10879 /* The FP codes work out to act like unsigned. */
10880 intcmp_mode = fpcmp_mode;
10881 code = first_code;
10882 if (bypass_code != UNKNOWN)
10883 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10884 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10885 const0_rtx);
10886 if (second_code != UNKNOWN)
10887 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10888 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10889 const0_rtx);
10890 }
10891 else
10892 {
10893 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10894 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10895 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10896 if (!scratch)
10897 scratch = gen_reg_rtx (HImode);
10898 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10899
10900 /* In the unordered case, we have to check C2 for NaN's, which
10901 doesn't happen to work out to anything nice combination-wise.
10902 So do some bit twiddling on the value we've got in AH to come
10903 up with an appropriate set of condition codes. */
10904
10905 intcmp_mode = CCNOmode;
10906 switch (code)
10907 {
10908 case GT:
10909 case UNGT:
10910 if (code == GT || !TARGET_IEEE_FP)
10911 {
10912 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10913 code = EQ;
10914 }
10915 else
10916 {
10917 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10918 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10919 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10920 intcmp_mode = CCmode;
10921 code = GEU;
10922 }
10923 break;
10924 case LT:
10925 case UNLT:
10926 if (code == LT && TARGET_IEEE_FP)
10927 {
10928 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10929 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10930 intcmp_mode = CCmode;
10931 code = EQ;
10932 }
10933 else
10934 {
10935 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10936 code = NE;
10937 }
10938 break;
10939 case GE:
10940 case UNGE:
10941 if (code == GE || !TARGET_IEEE_FP)
10942 {
10943 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10944 code = EQ;
10945 }
10946 else
10947 {
10948 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10949 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10950 GEN_INT (0x01)));
10951 code = NE;
10952 }
10953 break;
10954 case LE:
10955 case UNLE:
10956 if (code == LE && TARGET_IEEE_FP)
10957 {
10958 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10959 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10960 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10961 intcmp_mode = CCmode;
10962 code = LTU;
10963 }
10964 else
10965 {
10966 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10967 code = NE;
10968 }
10969 break;
10970 case EQ:
10971 case UNEQ:
10972 if (code == EQ && TARGET_IEEE_FP)
10973 {
10974 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10975 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10976 intcmp_mode = CCmode;
10977 code = EQ;
10978 }
10979 else
10980 {
10981 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10982 code = NE;
10983 break;
10984 }
10985 break;
10986 case NE:
10987 case LTGT:
10988 if (code == NE && TARGET_IEEE_FP)
10989 {
10990 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10991 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10992 GEN_INT (0x40)));
10993 code = NE;
10994 }
10995 else
10996 {
10997 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10998 code = EQ;
10999 }
11000 break;
11001
11002 case UNORDERED:
11003 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11004 code = NE;
11005 break;
11006 case ORDERED:
11007 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11008 code = EQ;
11009 break;
11010
11011 default:
11012 gcc_unreachable ();
11013 }
11014 }
11015
11016 /* Return the test that should be put into the flags user, i.e.
11017 the bcc, scc, or cmov instruction. */
11018 return gen_rtx_fmt_ee (code, VOIDmode,
11019 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11020 const0_rtx);
11021 }
11022
11023 rtx
11024 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11025 {
11026 rtx op0, op1, ret;
11027 op0 = ix86_compare_op0;
11028 op1 = ix86_compare_op1;
11029
11030 if (second_test)
11031 *second_test = NULL_RTX;
11032 if (bypass_test)
11033 *bypass_test = NULL_RTX;
11034
11035 if (ix86_compare_emitted)
11036 {
11037 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11038 ix86_compare_emitted = NULL_RTX;
11039 }
11040 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11041 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11042 second_test, bypass_test);
11043 else
11044 ret = ix86_expand_int_compare (code, op0, op1);
11045
11046 return ret;
11047 }
11048
11049 /* Return true if the CODE will result in nontrivial jump sequence. */
11050 bool
11051 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11052 {
11053 enum rtx_code bypass_code, first_code, second_code;
11054 if (!TARGET_CMOVE)
11055 return true;
11056 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11057 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11058 }
11059
11060 void
11061 ix86_expand_branch (enum rtx_code code, rtx label)
11062 {
11063 rtx tmp;
11064
11065 /* If we have emitted a compare insn, go straight to simple.
11066 ix86_expand_compare won't emit anything if ix86_compare_emitted
11067 is non NULL. */
11068 if (ix86_compare_emitted)
11069 goto simple;
11070
11071 switch (GET_MODE (ix86_compare_op0))
11072 {
11073 case QImode:
11074 case HImode:
11075 case SImode:
11076 simple:
11077 tmp = ix86_expand_compare (code, NULL, NULL);
11078 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11079 gen_rtx_LABEL_REF (VOIDmode, label),
11080 pc_rtx);
11081 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11082 return;
11083
11084 case SFmode:
11085 case DFmode:
11086 case XFmode:
11087 {
11088 rtvec vec;
11089 int use_fcomi;
11090 enum rtx_code bypass_code, first_code, second_code;
11091
11092 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11093 &ix86_compare_op1);
11094
11095 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11096
11097 /* Check whether we will use the natural sequence with one jump. If
11098 so, we can expand jump early. Otherwise delay expansion by
11099 creating compound insn to not confuse optimizers. */
11100 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11101 && TARGET_CMOVE)
11102 {
11103 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11104 gen_rtx_LABEL_REF (VOIDmode, label),
11105 pc_rtx, NULL_RTX, NULL_RTX);
11106 }
11107 else
11108 {
11109 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11110 ix86_compare_op0, ix86_compare_op1);
11111 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11112 gen_rtx_LABEL_REF (VOIDmode, label),
11113 pc_rtx);
11114 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11115
11116 use_fcomi = ix86_use_fcomi_compare (code);
11117 vec = rtvec_alloc (3 + !use_fcomi);
11118 RTVEC_ELT (vec, 0) = tmp;
11119 RTVEC_ELT (vec, 1)
11120 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11121 RTVEC_ELT (vec, 2)
11122 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11123 if (! use_fcomi)
11124 RTVEC_ELT (vec, 3)
11125 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11126
11127 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11128 }
11129 return;
11130 }
11131
11132 case DImode:
11133 if (TARGET_64BIT)
11134 goto simple;
11135 case TImode:
11136 /* Expand DImode branch into multiple compare+branch. */
11137 {
11138 rtx lo[2], hi[2], label2;
11139 enum rtx_code code1, code2, code3;
11140 enum machine_mode submode;
11141
11142 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11143 {
11144 tmp = ix86_compare_op0;
11145 ix86_compare_op0 = ix86_compare_op1;
11146 ix86_compare_op1 = tmp;
11147 code = swap_condition (code);
11148 }
11149 if (GET_MODE (ix86_compare_op0) == DImode)
11150 {
11151 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11152 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11153 submode = SImode;
11154 }
11155 else
11156 {
11157 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11158 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11159 submode = DImode;
11160 }
11161
11162 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11163 avoid two branches. This costs one extra insn, so disable when
11164 optimizing for size. */
11165
11166 if ((code == EQ || code == NE)
11167 && (!optimize_size
11168 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11169 {
11170 rtx xor0, xor1;
11171
11172 xor1 = hi[0];
11173 if (hi[1] != const0_rtx)
11174 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11175 NULL_RTX, 0, OPTAB_WIDEN);
11176
11177 xor0 = lo[0];
11178 if (lo[1] != const0_rtx)
11179 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11180 NULL_RTX, 0, OPTAB_WIDEN);
11181
11182 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11183 NULL_RTX, 0, OPTAB_WIDEN);
11184
11185 ix86_compare_op0 = tmp;
11186 ix86_compare_op1 = const0_rtx;
11187 ix86_expand_branch (code, label);
11188 return;
11189 }
11190
11191 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11192 op1 is a constant and the low word is zero, then we can just
11193 examine the high word. */
11194
11195 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11196 switch (code)
11197 {
11198 case LT: case LTU: case GE: case GEU:
11199 ix86_compare_op0 = hi[0];
11200 ix86_compare_op1 = hi[1];
11201 ix86_expand_branch (code, label);
11202 return;
11203 default:
11204 break;
11205 }
11206
11207 /* Otherwise, we need two or three jumps. */
11208
11209 label2 = gen_label_rtx ();
11210
11211 code1 = code;
11212 code2 = swap_condition (code);
11213 code3 = unsigned_condition (code);
11214
11215 switch (code)
11216 {
11217 case LT: case GT: case LTU: case GTU:
11218 break;
11219
11220 case LE: code1 = LT; code2 = GT; break;
11221 case GE: code1 = GT; code2 = LT; break;
11222 case LEU: code1 = LTU; code2 = GTU; break;
11223 case GEU: code1 = GTU; code2 = LTU; break;
11224
11225 case EQ: code1 = UNKNOWN; code2 = NE; break;
11226 case NE: code2 = UNKNOWN; break;
11227
11228 default:
11229 gcc_unreachable ();
11230 }
11231
11232 /*
11233 * a < b =>
11234 * if (hi(a) < hi(b)) goto true;
11235 * if (hi(a) > hi(b)) goto false;
11236 * if (lo(a) < lo(b)) goto true;
11237 * false:
11238 */
11239
11240 ix86_compare_op0 = hi[0];
11241 ix86_compare_op1 = hi[1];
11242
11243 if (code1 != UNKNOWN)
11244 ix86_expand_branch (code1, label);
11245 if (code2 != UNKNOWN)
11246 ix86_expand_branch (code2, label2);
11247
11248 ix86_compare_op0 = lo[0];
11249 ix86_compare_op1 = lo[1];
11250 ix86_expand_branch (code3, label);
11251
11252 if (code2 != UNKNOWN)
11253 emit_label (label2);
11254 return;
11255 }
11256
11257 default:
11258 gcc_unreachable ();
11259 }
11260 }
11261
11262 /* Split branch based on floating point condition. */
11263 void
11264 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11265 rtx target1, rtx target2, rtx tmp, rtx pushed)
11266 {
11267 rtx second, bypass;
11268 rtx label = NULL_RTX;
11269 rtx condition;
11270 int bypass_probability = -1, second_probability = -1, probability = -1;
11271 rtx i;
11272
11273 if (target2 != pc_rtx)
11274 {
11275 rtx tmp = target2;
11276 code = reverse_condition_maybe_unordered (code);
11277 target2 = target1;
11278 target1 = tmp;
11279 }
11280
11281 condition = ix86_expand_fp_compare (code, op1, op2,
11282 tmp, &second, &bypass);
11283
11284 /* Remove pushed operand from stack. */
11285 if (pushed)
11286 ix86_free_from_memory (GET_MODE (pushed));
11287
11288 if (split_branch_probability >= 0)
11289 {
11290 /* Distribute the probabilities across the jumps.
11291 Assume the BYPASS and SECOND to be always test
11292 for UNORDERED. */
11293 probability = split_branch_probability;
11294
11295 /* Value of 1 is low enough to make no need for probability
11296 to be updated. Later we may run some experiments and see
11297 if unordered values are more frequent in practice. */
11298 if (bypass)
11299 bypass_probability = 1;
11300 if (second)
11301 second_probability = 1;
11302 }
11303 if (bypass != NULL_RTX)
11304 {
11305 label = gen_label_rtx ();
11306 i = emit_jump_insn (gen_rtx_SET
11307 (VOIDmode, pc_rtx,
11308 gen_rtx_IF_THEN_ELSE (VOIDmode,
11309 bypass,
11310 gen_rtx_LABEL_REF (VOIDmode,
11311 label),
11312 pc_rtx)));
11313 if (bypass_probability >= 0)
11314 REG_NOTES (i)
11315 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11316 GEN_INT (bypass_probability),
11317 REG_NOTES (i));
11318 }
11319 i = emit_jump_insn (gen_rtx_SET
11320 (VOIDmode, pc_rtx,
11321 gen_rtx_IF_THEN_ELSE (VOIDmode,
11322 condition, target1, target2)));
11323 if (probability >= 0)
11324 REG_NOTES (i)
11325 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11326 GEN_INT (probability),
11327 REG_NOTES (i));
11328 if (second != NULL_RTX)
11329 {
11330 i = emit_jump_insn (gen_rtx_SET
11331 (VOIDmode, pc_rtx,
11332 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11333 target2)));
11334 if (second_probability >= 0)
11335 REG_NOTES (i)
11336 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11337 GEN_INT (second_probability),
11338 REG_NOTES (i));
11339 }
11340 if (label != NULL_RTX)
11341 emit_label (label);
11342 }
11343
11344 int
11345 ix86_expand_setcc (enum rtx_code code, rtx dest)
11346 {
11347 rtx ret, tmp, tmpreg, equiv;
11348 rtx second_test, bypass_test;
11349
11350 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11351 return 0; /* FAIL */
11352
11353 gcc_assert (GET_MODE (dest) == QImode);
11354
11355 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11356 PUT_MODE (ret, QImode);
11357
11358 tmp = dest;
11359 tmpreg = dest;
11360
11361 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11362 if (bypass_test || second_test)
11363 {
11364 rtx test = second_test;
11365 int bypass = 0;
11366 rtx tmp2 = gen_reg_rtx (QImode);
11367 if (bypass_test)
11368 {
11369 gcc_assert (!second_test);
11370 test = bypass_test;
11371 bypass = 1;
11372 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11373 }
11374 PUT_MODE (test, QImode);
11375 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11376
11377 if (bypass)
11378 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11379 else
11380 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11381 }
11382
11383 /* Attach a REG_EQUAL note describing the comparison result. */
11384 if (ix86_compare_op0 && ix86_compare_op1)
11385 {
11386 equiv = simplify_gen_relational (code, QImode,
11387 GET_MODE (ix86_compare_op0),
11388 ix86_compare_op0, ix86_compare_op1);
11389 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11390 }
11391
11392 return 1; /* DONE */
11393 }
11394
11395 /* Expand comparison setting or clearing carry flag. Return true when
11396 successful and set pop for the operation. */
11397 static bool
11398 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11399 {
11400 enum machine_mode mode =
11401 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11402
11403 /* Do not handle DImode compares that go through special path. Also we can't
11404 deal with FP compares yet. This is possible to add. */
11405 if (mode == (TARGET_64BIT ? TImode : DImode))
11406 return false;
11407 if (FLOAT_MODE_P (mode))
11408 {
11409 rtx second_test = NULL, bypass_test = NULL;
11410 rtx compare_op, compare_seq;
11411
11412 /* Shortcut: following common codes never translate into carry flag compares. */
11413 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11414 || code == ORDERED || code == UNORDERED)
11415 return false;
11416
11417 /* These comparisons require zero flag; swap operands so they won't. */
11418 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11419 && !TARGET_IEEE_FP)
11420 {
11421 rtx tmp = op0;
11422 op0 = op1;
11423 op1 = tmp;
11424 code = swap_condition (code);
11425 }
11426
11427 /* Try to expand the comparison and verify that we end up with carry flag
11428 based comparison. This is fails to be true only when we decide to expand
11429 comparison using arithmetic that is not too common scenario. */
11430 start_sequence ();
11431 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11432 &second_test, &bypass_test);
11433 compare_seq = get_insns ();
11434 end_sequence ();
11435
11436 if (second_test || bypass_test)
11437 return false;
11438 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11439 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11440 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11441 else
11442 code = GET_CODE (compare_op);
11443 if (code != LTU && code != GEU)
11444 return false;
11445 emit_insn (compare_seq);
11446 *pop = compare_op;
11447 return true;
11448 }
11449 if (!INTEGRAL_MODE_P (mode))
11450 return false;
11451 switch (code)
11452 {
11453 case LTU:
11454 case GEU:
11455 break;
11456
11457 /* Convert a==0 into (unsigned)a<1. */
11458 case EQ:
11459 case NE:
11460 if (op1 != const0_rtx)
11461 return false;
11462 op1 = const1_rtx;
11463 code = (code == EQ ? LTU : GEU);
11464 break;
11465
11466 /* Convert a>b into b<a or a>=b-1. */
11467 case GTU:
11468 case LEU:
11469 if (CONST_INT_P (op1))
11470 {
11471 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11472 /* Bail out on overflow. We still can swap operands but that
11473 would force loading of the constant into register. */
11474 if (op1 == const0_rtx
11475 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11476 return false;
11477 code = (code == GTU ? GEU : LTU);
11478 }
11479 else
11480 {
11481 rtx tmp = op1;
11482 op1 = op0;
11483 op0 = tmp;
11484 code = (code == GTU ? LTU : GEU);
11485 }
11486 break;
11487
11488 /* Convert a>=0 into (unsigned)a<0x80000000. */
11489 case LT:
11490 case GE:
11491 if (mode == DImode || op1 != const0_rtx)
11492 return false;
11493 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11494 code = (code == LT ? GEU : LTU);
11495 break;
11496 case LE:
11497 case GT:
11498 if (mode == DImode || op1 != constm1_rtx)
11499 return false;
11500 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11501 code = (code == LE ? GEU : LTU);
11502 break;
11503
11504 default:
11505 return false;
11506 }
11507 /* Swapping operands may cause constant to appear as first operand. */
11508 if (!nonimmediate_operand (op0, VOIDmode))
11509 {
11510 if (no_new_pseudos)
11511 return false;
11512 op0 = force_reg (mode, op0);
11513 }
11514 ix86_compare_op0 = op0;
11515 ix86_compare_op1 = op1;
11516 *pop = ix86_expand_compare (code, NULL, NULL);
11517 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11518 return true;
11519 }
11520
11521 int
11522 ix86_expand_int_movcc (rtx operands[])
11523 {
11524 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11525 rtx compare_seq, compare_op;
11526 rtx second_test, bypass_test;
11527 enum machine_mode mode = GET_MODE (operands[0]);
11528 bool sign_bit_compare_p = false;;
11529
11530 start_sequence ();
11531 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11532 compare_seq = get_insns ();
11533 end_sequence ();
11534
11535 compare_code = GET_CODE (compare_op);
11536
11537 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11538 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11539 sign_bit_compare_p = true;
11540
11541 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11542 HImode insns, we'd be swallowed in word prefix ops. */
11543
11544 if ((mode != HImode || TARGET_FAST_PREFIX)
11545 && (mode != (TARGET_64BIT ? TImode : DImode))
11546 && CONST_INT_P (operands[2])
11547 && CONST_INT_P (operands[3]))
11548 {
11549 rtx out = operands[0];
11550 HOST_WIDE_INT ct = INTVAL (operands[2]);
11551 HOST_WIDE_INT cf = INTVAL (operands[3]);
11552 HOST_WIDE_INT diff;
11553
11554 diff = ct - cf;
11555 /* Sign bit compares are better done using shifts than we do by using
11556 sbb. */
11557 if (sign_bit_compare_p
11558 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11559 ix86_compare_op1, &compare_op))
11560 {
11561 /* Detect overlap between destination and compare sources. */
11562 rtx tmp = out;
11563
11564 if (!sign_bit_compare_p)
11565 {
11566 bool fpcmp = false;
11567
11568 compare_code = GET_CODE (compare_op);
11569
11570 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11571 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11572 {
11573 fpcmp = true;
11574 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11575 }
11576
11577 /* To simplify rest of code, restrict to the GEU case. */
11578 if (compare_code == LTU)
11579 {
11580 HOST_WIDE_INT tmp = ct;
11581 ct = cf;
11582 cf = tmp;
11583 compare_code = reverse_condition (compare_code);
11584 code = reverse_condition (code);
11585 }
11586 else
11587 {
11588 if (fpcmp)
11589 PUT_CODE (compare_op,
11590 reverse_condition_maybe_unordered
11591 (GET_CODE (compare_op)));
11592 else
11593 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11594 }
11595 diff = ct - cf;
11596
11597 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11598 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11599 tmp = gen_reg_rtx (mode);
11600
11601 if (mode == DImode)
11602 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11603 else
11604 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11605 }
11606 else
11607 {
11608 if (code == GT || code == GE)
11609 code = reverse_condition (code);
11610 else
11611 {
11612 HOST_WIDE_INT tmp = ct;
11613 ct = cf;
11614 cf = tmp;
11615 diff = ct - cf;
11616 }
11617 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11618 ix86_compare_op1, VOIDmode, 0, -1);
11619 }
11620
11621 if (diff == 1)
11622 {
11623 /*
11624 * cmpl op0,op1
11625 * sbbl dest,dest
11626 * [addl dest, ct]
11627 *
11628 * Size 5 - 8.
11629 */
11630 if (ct)
11631 tmp = expand_simple_binop (mode, PLUS,
11632 tmp, GEN_INT (ct),
11633 copy_rtx (tmp), 1, OPTAB_DIRECT);
11634 }
11635 else if (cf == -1)
11636 {
11637 /*
11638 * cmpl op0,op1
11639 * sbbl dest,dest
11640 * orl $ct, dest
11641 *
11642 * Size 8.
11643 */
11644 tmp = expand_simple_binop (mode, IOR,
11645 tmp, GEN_INT (ct),
11646 copy_rtx (tmp), 1, OPTAB_DIRECT);
11647 }
11648 else if (diff == -1 && ct)
11649 {
11650 /*
11651 * cmpl op0,op1
11652 * sbbl dest,dest
11653 * notl dest
11654 * [addl dest, cf]
11655 *
11656 * Size 8 - 11.
11657 */
11658 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11659 if (cf)
11660 tmp = expand_simple_binop (mode, PLUS,
11661 copy_rtx (tmp), GEN_INT (cf),
11662 copy_rtx (tmp), 1, OPTAB_DIRECT);
11663 }
11664 else
11665 {
11666 /*
11667 * cmpl op0,op1
11668 * sbbl dest,dest
11669 * [notl dest]
11670 * andl cf - ct, dest
11671 * [addl dest, ct]
11672 *
11673 * Size 8 - 11.
11674 */
11675
11676 if (cf == 0)
11677 {
11678 cf = ct;
11679 ct = 0;
11680 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11681 }
11682
11683 tmp = expand_simple_binop (mode, AND,
11684 copy_rtx (tmp),
11685 gen_int_mode (cf - ct, mode),
11686 copy_rtx (tmp), 1, OPTAB_DIRECT);
11687 if (ct)
11688 tmp = expand_simple_binop (mode, PLUS,
11689 copy_rtx (tmp), GEN_INT (ct),
11690 copy_rtx (tmp), 1, OPTAB_DIRECT);
11691 }
11692
11693 if (!rtx_equal_p (tmp, out))
11694 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11695
11696 return 1; /* DONE */
11697 }
11698
11699 if (diff < 0)
11700 {
11701 HOST_WIDE_INT tmp;
11702 tmp = ct, ct = cf, cf = tmp;
11703 diff = -diff;
11704 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11705 {
11706 /* We may be reversing unordered compare to normal compare, that
11707 is not valid in general (we may convert non-trapping condition
11708 to trapping one), however on i386 we currently emit all
11709 comparisons unordered. */
11710 compare_code = reverse_condition_maybe_unordered (compare_code);
11711 code = reverse_condition_maybe_unordered (code);
11712 }
11713 else
11714 {
11715 compare_code = reverse_condition (compare_code);
11716 code = reverse_condition (code);
11717 }
11718 }
11719
11720 compare_code = UNKNOWN;
11721 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11722 && CONST_INT_P (ix86_compare_op1))
11723 {
11724 if (ix86_compare_op1 == const0_rtx
11725 && (code == LT || code == GE))
11726 compare_code = code;
11727 else if (ix86_compare_op1 == constm1_rtx)
11728 {
11729 if (code == LE)
11730 compare_code = LT;
11731 else if (code == GT)
11732 compare_code = GE;
11733 }
11734 }
11735
11736 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11737 if (compare_code != UNKNOWN
11738 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11739 && (cf == -1 || ct == -1))
11740 {
11741 /* If lea code below could be used, only optimize
11742 if it results in a 2 insn sequence. */
11743
11744 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11745 || diff == 3 || diff == 5 || diff == 9)
11746 || (compare_code == LT && ct == -1)
11747 || (compare_code == GE && cf == -1))
11748 {
11749 /*
11750 * notl op1 (if necessary)
11751 * sarl $31, op1
11752 * orl cf, op1
11753 */
11754 if (ct != -1)
11755 {
11756 cf = ct;
11757 ct = -1;
11758 code = reverse_condition (code);
11759 }
11760
11761 out = emit_store_flag (out, code, ix86_compare_op0,
11762 ix86_compare_op1, VOIDmode, 0, -1);
11763
11764 out = expand_simple_binop (mode, IOR,
11765 out, GEN_INT (cf),
11766 out, 1, OPTAB_DIRECT);
11767 if (out != operands[0])
11768 emit_move_insn (operands[0], out);
11769
11770 return 1; /* DONE */
11771 }
11772 }
11773
11774
11775 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11776 || diff == 3 || diff == 5 || diff == 9)
11777 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11778 && (mode != DImode
11779 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11780 {
11781 /*
11782 * xorl dest,dest
11783 * cmpl op1,op2
11784 * setcc dest
11785 * lea cf(dest*(ct-cf)),dest
11786 *
11787 * Size 14.
11788 *
11789 * This also catches the degenerate setcc-only case.
11790 */
11791
11792 rtx tmp;
11793 int nops;
11794
11795 out = emit_store_flag (out, code, ix86_compare_op0,
11796 ix86_compare_op1, VOIDmode, 0, 1);
11797
11798 nops = 0;
11799 /* On x86_64 the lea instruction operates on Pmode, so we need
11800 to get arithmetics done in proper mode to match. */
11801 if (diff == 1)
11802 tmp = copy_rtx (out);
11803 else
11804 {
11805 rtx out1;
11806 out1 = copy_rtx (out);
11807 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11808 nops++;
11809 if (diff & 1)
11810 {
11811 tmp = gen_rtx_PLUS (mode, tmp, out1);
11812 nops++;
11813 }
11814 }
11815 if (cf != 0)
11816 {
11817 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11818 nops++;
11819 }
11820 if (!rtx_equal_p (tmp, out))
11821 {
11822 if (nops == 1)
11823 out = force_operand (tmp, copy_rtx (out));
11824 else
11825 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11826 }
11827 if (!rtx_equal_p (out, operands[0]))
11828 emit_move_insn (operands[0], copy_rtx (out));
11829
11830 return 1; /* DONE */
11831 }
11832
11833 /*
11834 * General case: Jumpful:
11835 * xorl dest,dest cmpl op1, op2
11836 * cmpl op1, op2 movl ct, dest
11837 * setcc dest jcc 1f
11838 * decl dest movl cf, dest
11839 * andl (cf-ct),dest 1:
11840 * addl ct,dest
11841 *
11842 * Size 20. Size 14.
11843 *
11844 * This is reasonably steep, but branch mispredict costs are
11845 * high on modern cpus, so consider failing only if optimizing
11846 * for space.
11847 */
11848
11849 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11850 && BRANCH_COST >= 2)
11851 {
11852 if (cf == 0)
11853 {
11854 cf = ct;
11855 ct = 0;
11856 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11857 /* We may be reversing unordered compare to normal compare,
11858 that is not valid in general (we may convert non-trapping
11859 condition to trapping one), however on i386 we currently
11860 emit all comparisons unordered. */
11861 code = reverse_condition_maybe_unordered (code);
11862 else
11863 {
11864 code = reverse_condition (code);
11865 if (compare_code != UNKNOWN)
11866 compare_code = reverse_condition (compare_code);
11867 }
11868 }
11869
11870 if (compare_code != UNKNOWN)
11871 {
11872 /* notl op1 (if needed)
11873 sarl $31, op1
11874 andl (cf-ct), op1
11875 addl ct, op1
11876
11877 For x < 0 (resp. x <= -1) there will be no notl,
11878 so if possible swap the constants to get rid of the
11879 complement.
11880 True/false will be -1/0 while code below (store flag
11881 followed by decrement) is 0/-1, so the constants need
11882 to be exchanged once more. */
11883
11884 if (compare_code == GE || !cf)
11885 {
11886 code = reverse_condition (code);
11887 compare_code = LT;
11888 }
11889 else
11890 {
11891 HOST_WIDE_INT tmp = cf;
11892 cf = ct;
11893 ct = tmp;
11894 }
11895
11896 out = emit_store_flag (out, code, ix86_compare_op0,
11897 ix86_compare_op1, VOIDmode, 0, -1);
11898 }
11899 else
11900 {
11901 out = emit_store_flag (out, code, ix86_compare_op0,
11902 ix86_compare_op1, VOIDmode, 0, 1);
11903
11904 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11905 copy_rtx (out), 1, OPTAB_DIRECT);
11906 }
11907
11908 out = expand_simple_binop (mode, AND, copy_rtx (out),
11909 gen_int_mode (cf - ct, mode),
11910 copy_rtx (out), 1, OPTAB_DIRECT);
11911 if (ct)
11912 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11913 copy_rtx (out), 1, OPTAB_DIRECT);
11914 if (!rtx_equal_p (out, operands[0]))
11915 emit_move_insn (operands[0], copy_rtx (out));
11916
11917 return 1; /* DONE */
11918 }
11919 }
11920
11921 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11922 {
11923 /* Try a few things more with specific constants and a variable. */
11924
11925 optab op;
11926 rtx var, orig_out, out, tmp;
11927
11928 if (BRANCH_COST <= 2)
11929 return 0; /* FAIL */
11930
11931 /* If one of the two operands is an interesting constant, load a
11932 constant with the above and mask it in with a logical operation. */
11933
11934 if (CONST_INT_P (operands[2]))
11935 {
11936 var = operands[3];
11937 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11938 operands[3] = constm1_rtx, op = and_optab;
11939 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11940 operands[3] = const0_rtx, op = ior_optab;
11941 else
11942 return 0; /* FAIL */
11943 }
11944 else if (CONST_INT_P (operands[3]))
11945 {
11946 var = operands[2];
11947 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11948 operands[2] = constm1_rtx, op = and_optab;
11949 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11950 operands[2] = const0_rtx, op = ior_optab;
11951 else
11952 return 0; /* FAIL */
11953 }
11954 else
11955 return 0; /* FAIL */
11956
11957 orig_out = operands[0];
11958 tmp = gen_reg_rtx (mode);
11959 operands[0] = tmp;
11960
11961 /* Recurse to get the constant loaded. */
11962 if (ix86_expand_int_movcc (operands) == 0)
11963 return 0; /* FAIL */
11964
11965 /* Mask in the interesting variable. */
11966 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11967 OPTAB_WIDEN);
11968 if (!rtx_equal_p (out, orig_out))
11969 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11970
11971 return 1; /* DONE */
11972 }
11973
11974 /*
11975 * For comparison with above,
11976 *
11977 * movl cf,dest
11978 * movl ct,tmp
11979 * cmpl op1,op2
11980 * cmovcc tmp,dest
11981 *
11982 * Size 15.
11983 */
11984
11985 if (! nonimmediate_operand (operands[2], mode))
11986 operands[2] = force_reg (mode, operands[2]);
11987 if (! nonimmediate_operand (operands[3], mode))
11988 operands[3] = force_reg (mode, operands[3]);
11989
11990 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11991 {
11992 rtx tmp = gen_reg_rtx (mode);
11993 emit_move_insn (tmp, operands[3]);
11994 operands[3] = tmp;
11995 }
11996 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11997 {
11998 rtx tmp = gen_reg_rtx (mode);
11999 emit_move_insn (tmp, operands[2]);
12000 operands[2] = tmp;
12001 }
12002
12003 if (! register_operand (operands[2], VOIDmode)
12004 && (mode == QImode
12005 || ! register_operand (operands[3], VOIDmode)))
12006 operands[2] = force_reg (mode, operands[2]);
12007
12008 if (mode == QImode
12009 && ! register_operand (operands[3], VOIDmode))
12010 operands[3] = force_reg (mode, operands[3]);
12011
12012 emit_insn (compare_seq);
12013 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12014 gen_rtx_IF_THEN_ELSE (mode,
12015 compare_op, operands[2],
12016 operands[3])));
12017 if (bypass_test)
12018 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12019 gen_rtx_IF_THEN_ELSE (mode,
12020 bypass_test,
12021 copy_rtx (operands[3]),
12022 copy_rtx (operands[0]))));
12023 if (second_test)
12024 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12025 gen_rtx_IF_THEN_ELSE (mode,
12026 second_test,
12027 copy_rtx (operands[2]),
12028 copy_rtx (operands[0]))));
12029
12030 return 1; /* DONE */
12031 }
12032
12033 /* Swap, force into registers, or otherwise massage the two operands
12034 to an sse comparison with a mask result. Thus we differ a bit from
12035 ix86_prepare_fp_compare_args which expects to produce a flags result.
12036
12037 The DEST operand exists to help determine whether to commute commutative
12038 operators. The POP0/POP1 operands are updated in place. The new
12039 comparison code is returned, or UNKNOWN if not implementable. */
12040
12041 static enum rtx_code
12042 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12043 rtx *pop0, rtx *pop1)
12044 {
12045 rtx tmp;
12046
12047 switch (code)
12048 {
12049 case LTGT:
12050 case UNEQ:
12051 /* We have no LTGT as an operator. We could implement it with
12052 NE & ORDERED, but this requires an extra temporary. It's
12053 not clear that it's worth it. */
12054 return UNKNOWN;
12055
12056 case LT:
12057 case LE:
12058 case UNGT:
12059 case UNGE:
12060 /* These are supported directly. */
12061 break;
12062
12063 case EQ:
12064 case NE:
12065 case UNORDERED:
12066 case ORDERED:
12067 /* For commutative operators, try to canonicalize the destination
12068 operand to be first in the comparison - this helps reload to
12069 avoid extra moves. */
12070 if (!dest || !rtx_equal_p (dest, *pop1))
12071 break;
12072 /* FALLTHRU */
12073
12074 case GE:
12075 case GT:
12076 case UNLE:
12077 case UNLT:
12078 /* These are not supported directly. Swap the comparison operands
12079 to transform into something that is supported. */
12080 tmp = *pop0;
12081 *pop0 = *pop1;
12082 *pop1 = tmp;
12083 code = swap_condition (code);
12084 break;
12085
12086 default:
12087 gcc_unreachable ();
12088 }
12089
12090 return code;
12091 }
12092
12093 /* Detect conditional moves that exactly match min/max operational
12094 semantics. Note that this is IEEE safe, as long as we don't
12095 interchange the operands.
12096
12097 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12098 and TRUE if the operation is successful and instructions are emitted. */
12099
12100 static bool
12101 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12102 rtx cmp_op1, rtx if_true, rtx if_false)
12103 {
12104 enum machine_mode mode;
12105 bool is_min;
12106 rtx tmp;
12107
12108 if (code == LT)
12109 ;
12110 else if (code == UNGE)
12111 {
12112 tmp = if_true;
12113 if_true = if_false;
12114 if_false = tmp;
12115 }
12116 else
12117 return false;
12118
12119 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12120 is_min = true;
12121 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12122 is_min = false;
12123 else
12124 return false;
12125
12126 mode = GET_MODE (dest);
12127
12128 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12129 but MODE may be a vector mode and thus not appropriate. */
12130 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12131 {
12132 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12133 rtvec v;
12134
12135 if_true = force_reg (mode, if_true);
12136 v = gen_rtvec (2, if_true, if_false);
12137 tmp = gen_rtx_UNSPEC (mode, v, u);
12138 }
12139 else
12140 {
12141 code = is_min ? SMIN : SMAX;
12142 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12143 }
12144
12145 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12146 return true;
12147 }
12148
12149 /* Expand an sse vector comparison. Return the register with the result. */
12150
12151 static rtx
12152 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12153 rtx op_true, rtx op_false)
12154 {
12155 enum machine_mode mode = GET_MODE (dest);
12156 rtx x;
12157
12158 cmp_op0 = force_reg (mode, cmp_op0);
12159 if (!nonimmediate_operand (cmp_op1, mode))
12160 cmp_op1 = force_reg (mode, cmp_op1);
12161
12162 if (optimize
12163 || reg_overlap_mentioned_p (dest, op_true)
12164 || reg_overlap_mentioned_p (dest, op_false))
12165 dest = gen_reg_rtx (mode);
12166
12167 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12168 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12169
12170 return dest;
12171 }
12172
12173 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12174 operations. This is used for both scalar and vector conditional moves. */
12175
12176 static void
12177 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12178 {
12179 enum machine_mode mode = GET_MODE (dest);
12180 rtx t2, t3, x;
12181
12182 if (op_false == CONST0_RTX (mode))
12183 {
12184 op_true = force_reg (mode, op_true);
12185 x = gen_rtx_AND (mode, cmp, op_true);
12186 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12187 }
12188 else if (op_true == CONST0_RTX (mode))
12189 {
12190 op_false = force_reg (mode, op_false);
12191 x = gen_rtx_NOT (mode, cmp);
12192 x = gen_rtx_AND (mode, x, op_false);
12193 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12194 }
12195 else
12196 {
12197 op_true = force_reg (mode, op_true);
12198 op_false = force_reg (mode, op_false);
12199
12200 t2 = gen_reg_rtx (mode);
12201 if (optimize)
12202 t3 = gen_reg_rtx (mode);
12203 else
12204 t3 = dest;
12205
12206 x = gen_rtx_AND (mode, op_true, cmp);
12207 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12208
12209 x = gen_rtx_NOT (mode, cmp);
12210 x = gen_rtx_AND (mode, x, op_false);
12211 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12212
12213 x = gen_rtx_IOR (mode, t3, t2);
12214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12215 }
12216 }
12217
12218 /* Expand a floating-point conditional move. Return true if successful. */
12219
12220 int
12221 ix86_expand_fp_movcc (rtx operands[])
12222 {
12223 enum machine_mode mode = GET_MODE (operands[0]);
12224 enum rtx_code code = GET_CODE (operands[1]);
12225 rtx tmp, compare_op, second_test, bypass_test;
12226
12227 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12228 {
12229 enum machine_mode cmode;
12230
12231 /* Since we've no cmove for sse registers, don't force bad register
12232 allocation just to gain access to it. Deny movcc when the
12233 comparison mode doesn't match the move mode. */
12234 cmode = GET_MODE (ix86_compare_op0);
12235 if (cmode == VOIDmode)
12236 cmode = GET_MODE (ix86_compare_op1);
12237 if (cmode != mode)
12238 return 0;
12239
12240 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12241 &ix86_compare_op0,
12242 &ix86_compare_op1);
12243 if (code == UNKNOWN)
12244 return 0;
12245
12246 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12247 ix86_compare_op1, operands[2],
12248 operands[3]))
12249 return 1;
12250
12251 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12252 ix86_compare_op1, operands[2], operands[3]);
12253 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12254 return 1;
12255 }
12256
12257 /* The floating point conditional move instructions don't directly
12258 support conditions resulting from a signed integer comparison. */
12259
12260 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12261
12262 /* The floating point conditional move instructions don't directly
12263 support signed integer comparisons. */
12264
12265 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12266 {
12267 gcc_assert (!second_test && !bypass_test);
12268 tmp = gen_reg_rtx (QImode);
12269 ix86_expand_setcc (code, tmp);
12270 code = NE;
12271 ix86_compare_op0 = tmp;
12272 ix86_compare_op1 = const0_rtx;
12273 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12274 }
12275 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12276 {
12277 tmp = gen_reg_rtx (mode);
12278 emit_move_insn (tmp, operands[3]);
12279 operands[3] = tmp;
12280 }
12281 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12282 {
12283 tmp = gen_reg_rtx (mode);
12284 emit_move_insn (tmp, operands[2]);
12285 operands[2] = tmp;
12286 }
12287
12288 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12289 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12290 operands[2], operands[3])));
12291 if (bypass_test)
12292 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12293 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12294 operands[3], operands[0])));
12295 if (second_test)
12296 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12297 gen_rtx_IF_THEN_ELSE (mode, second_test,
12298 operands[2], operands[0])));
12299
12300 return 1;
12301 }
12302
12303 /* Expand a floating-point vector conditional move; a vcond operation
12304 rather than a movcc operation. */
12305
12306 bool
12307 ix86_expand_fp_vcond (rtx operands[])
12308 {
12309 enum rtx_code code = GET_CODE (operands[3]);
12310 rtx cmp;
12311
12312 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12313 &operands[4], &operands[5]);
12314 if (code == UNKNOWN)
12315 return false;
12316
12317 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12318 operands[5], operands[1], operands[2]))
12319 return true;
12320
12321 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12322 operands[1], operands[2]);
12323 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12324 return true;
12325 }
12326
12327 /* Expand a signed integral vector conditional move. */
12328
12329 bool
12330 ix86_expand_int_vcond (rtx operands[])
12331 {
12332 enum machine_mode mode = GET_MODE (operands[0]);
12333 enum rtx_code code = GET_CODE (operands[3]);
12334 bool negate = false;
12335 rtx x, cop0, cop1;
12336
12337 cop0 = operands[4];
12338 cop1 = operands[5];
12339
12340 /* Canonicalize the comparison to EQ, GT, GTU. */
12341 switch (code)
12342 {
12343 case EQ:
12344 case GT:
12345 case GTU:
12346 break;
12347
12348 case NE:
12349 case LE:
12350 case LEU:
12351 code = reverse_condition (code);
12352 negate = true;
12353 break;
12354
12355 case GE:
12356 case GEU:
12357 code = reverse_condition (code);
12358 negate = true;
12359 /* FALLTHRU */
12360
12361 case LT:
12362 case LTU:
12363 code = swap_condition (code);
12364 x = cop0, cop0 = cop1, cop1 = x;
12365 break;
12366
12367 default:
12368 gcc_unreachable ();
12369 }
12370
12371 /* Unsigned parallel compare is not supported by the hardware. Play some
12372 tricks to turn this into a signed comparison against 0. */
12373 if (code == GTU)
12374 {
12375 cop0 = force_reg (mode, cop0);
12376
12377 switch (mode)
12378 {
12379 case V4SImode:
12380 {
12381 rtx t1, t2, mask;
12382
12383 /* Perform a parallel modulo subtraction. */
12384 t1 = gen_reg_rtx (mode);
12385 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12386
12387 /* Extract the original sign bit of op0. */
12388 mask = GEN_INT (-0x80000000);
12389 mask = gen_rtx_CONST_VECTOR (mode,
12390 gen_rtvec (4, mask, mask, mask, mask));
12391 mask = force_reg (mode, mask);
12392 t2 = gen_reg_rtx (mode);
12393 emit_insn (gen_andv4si3 (t2, cop0, mask));
12394
12395 /* XOR it back into the result of the subtraction. This results
12396 in the sign bit set iff we saw unsigned underflow. */
12397 x = gen_reg_rtx (mode);
12398 emit_insn (gen_xorv4si3 (x, t1, t2));
12399
12400 code = GT;
12401 }
12402 break;
12403
12404 case V16QImode:
12405 case V8HImode:
12406 /* Perform a parallel unsigned saturating subtraction. */
12407 x = gen_reg_rtx (mode);
12408 emit_insn (gen_rtx_SET (VOIDmode, x,
12409 gen_rtx_US_MINUS (mode, cop0, cop1)));
12410
12411 code = EQ;
12412 negate = !negate;
12413 break;
12414
12415 default:
12416 gcc_unreachable ();
12417 }
12418
12419 cop0 = x;
12420 cop1 = CONST0_RTX (mode);
12421 }
12422
12423 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12424 operands[1+negate], operands[2-negate]);
12425
12426 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12427 operands[2-negate]);
12428 return true;
12429 }
12430
12431 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12432 true if we should do zero extension, else sign extension. HIGH_P is
12433 true if we want the N/2 high elements, else the low elements. */
12434
12435 void
12436 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12437 {
12438 enum machine_mode imode = GET_MODE (operands[1]);
12439 rtx (*unpack)(rtx, rtx, rtx);
12440 rtx se, dest;
12441
12442 switch (imode)
12443 {
12444 case V16QImode:
12445 if (high_p)
12446 unpack = gen_vec_interleave_highv16qi;
12447 else
12448 unpack = gen_vec_interleave_lowv16qi;
12449 break;
12450 case V8HImode:
12451 if (high_p)
12452 unpack = gen_vec_interleave_highv8hi;
12453 else
12454 unpack = gen_vec_interleave_lowv8hi;
12455 break;
12456 case V4SImode:
12457 if (high_p)
12458 unpack = gen_vec_interleave_highv4si;
12459 else
12460 unpack = gen_vec_interleave_lowv4si;
12461 break;
12462 default:
12463 gcc_unreachable ();
12464 }
12465
12466 dest = gen_lowpart (imode, operands[0]);
12467
12468 if (unsigned_p)
12469 se = force_reg (imode, CONST0_RTX (imode));
12470 else
12471 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12472 operands[1], pc_rtx, pc_rtx);
12473
12474 emit_insn (unpack (dest, operands[1], se));
12475 }
12476
12477 /* Expand conditional increment or decrement using adb/sbb instructions.
12478 The default case using setcc followed by the conditional move can be
12479 done by generic code. */
12480 int
12481 ix86_expand_int_addcc (rtx operands[])
12482 {
12483 enum rtx_code code = GET_CODE (operands[1]);
12484 rtx compare_op;
12485 rtx val = const0_rtx;
12486 bool fpcmp = false;
12487 enum machine_mode mode = GET_MODE (operands[0]);
12488
12489 if (operands[3] != const1_rtx
12490 && operands[3] != constm1_rtx)
12491 return 0;
12492 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12493 ix86_compare_op1, &compare_op))
12494 return 0;
12495 code = GET_CODE (compare_op);
12496
12497 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12498 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12499 {
12500 fpcmp = true;
12501 code = ix86_fp_compare_code_to_integer (code);
12502 }
12503
12504 if (code != LTU)
12505 {
12506 val = constm1_rtx;
12507 if (fpcmp)
12508 PUT_CODE (compare_op,
12509 reverse_condition_maybe_unordered
12510 (GET_CODE (compare_op)));
12511 else
12512 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12513 }
12514 PUT_MODE (compare_op, mode);
12515
12516 /* Construct either adc or sbb insn. */
12517 if ((code == LTU) == (operands[3] == constm1_rtx))
12518 {
12519 switch (GET_MODE (operands[0]))
12520 {
12521 case QImode:
12522 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12523 break;
12524 case HImode:
12525 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12526 break;
12527 case SImode:
12528 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12529 break;
12530 case DImode:
12531 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12532 break;
12533 default:
12534 gcc_unreachable ();
12535 }
12536 }
12537 else
12538 {
12539 switch (GET_MODE (operands[0]))
12540 {
12541 case QImode:
12542 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12543 break;
12544 case HImode:
12545 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12546 break;
12547 case SImode:
12548 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12549 break;
12550 case DImode:
12551 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12552 break;
12553 default:
12554 gcc_unreachable ();
12555 }
12556 }
12557 return 1; /* DONE */
12558 }
12559
12560
12561 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12562 works for floating pointer parameters and nonoffsetable memories.
12563 For pushes, it returns just stack offsets; the values will be saved
12564 in the right order. Maximally three parts are generated. */
12565
12566 static int
12567 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12568 {
12569 int size;
12570
12571 if (!TARGET_64BIT)
12572 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12573 else
12574 size = (GET_MODE_SIZE (mode) + 4) / 8;
12575
12576 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12577 gcc_assert (size >= 2 && size <= 3);
12578
12579 /* Optimize constant pool reference to immediates. This is used by fp
12580 moves, that force all constants to memory to allow combining. */
12581 if (MEM_P (operand) && MEM_READONLY_P (operand))
12582 {
12583 rtx tmp = maybe_get_pool_constant (operand);
12584 if (tmp)
12585 operand = tmp;
12586 }
12587
12588 if (MEM_P (operand) && !offsettable_memref_p (operand))
12589 {
12590 /* The only non-offsetable memories we handle are pushes. */
12591 int ok = push_operand (operand, VOIDmode);
12592
12593 gcc_assert (ok);
12594
12595 operand = copy_rtx (operand);
12596 PUT_MODE (operand, Pmode);
12597 parts[0] = parts[1] = parts[2] = operand;
12598 return size;
12599 }
12600
12601 if (GET_CODE (operand) == CONST_VECTOR)
12602 {
12603 enum machine_mode imode = int_mode_for_mode (mode);
12604 /* Caution: if we looked through a constant pool memory above,
12605 the operand may actually have a different mode now. That's
12606 ok, since we want to pun this all the way back to an integer. */
12607 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12608 gcc_assert (operand != NULL);
12609 mode = imode;
12610 }
12611
12612 if (!TARGET_64BIT)
12613 {
12614 if (mode == DImode)
12615 split_di (&operand, 1, &parts[0], &parts[1]);
12616 else
12617 {
12618 if (REG_P (operand))
12619 {
12620 gcc_assert (reload_completed);
12621 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12622 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12623 if (size == 3)
12624 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12625 }
12626 else if (offsettable_memref_p (operand))
12627 {
12628 operand = adjust_address (operand, SImode, 0);
12629 parts[0] = operand;
12630 parts[1] = adjust_address (operand, SImode, 4);
12631 if (size == 3)
12632 parts[2] = adjust_address (operand, SImode, 8);
12633 }
12634 else if (GET_CODE (operand) == CONST_DOUBLE)
12635 {
12636 REAL_VALUE_TYPE r;
12637 long l[4];
12638
12639 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12640 switch (mode)
12641 {
12642 case XFmode:
12643 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12644 parts[2] = gen_int_mode (l[2], SImode);
12645 break;
12646 case DFmode:
12647 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12648 break;
12649 default:
12650 gcc_unreachable ();
12651 }
12652 parts[1] = gen_int_mode (l[1], SImode);
12653 parts[0] = gen_int_mode (l[0], SImode);
12654 }
12655 else
12656 gcc_unreachable ();
12657 }
12658 }
12659 else
12660 {
12661 if (mode == TImode)
12662 split_ti (&operand, 1, &parts[0], &parts[1]);
12663 if (mode == XFmode || mode == TFmode)
12664 {
12665 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12666 if (REG_P (operand))
12667 {
12668 gcc_assert (reload_completed);
12669 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12670 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12671 }
12672 else if (offsettable_memref_p (operand))
12673 {
12674 operand = adjust_address (operand, DImode, 0);
12675 parts[0] = operand;
12676 parts[1] = adjust_address (operand, upper_mode, 8);
12677 }
12678 else if (GET_CODE (operand) == CONST_DOUBLE)
12679 {
12680 REAL_VALUE_TYPE r;
12681 long l[4];
12682
12683 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12684 real_to_target (l, &r, mode);
12685
12686 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12687 if (HOST_BITS_PER_WIDE_INT >= 64)
12688 parts[0]
12689 = gen_int_mode
12690 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12691 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12692 DImode);
12693 else
12694 parts[0] = immed_double_const (l[0], l[1], DImode);
12695
12696 if (upper_mode == SImode)
12697 parts[1] = gen_int_mode (l[2], SImode);
12698 else if (HOST_BITS_PER_WIDE_INT >= 64)
12699 parts[1]
12700 = gen_int_mode
12701 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12702 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12703 DImode);
12704 else
12705 parts[1] = immed_double_const (l[2], l[3], DImode);
12706 }
12707 else
12708 gcc_unreachable ();
12709 }
12710 }
12711
12712 return size;
12713 }
12714
12715 /* Emit insns to perform a move or push of DI, DF, and XF values.
12716 Return false when normal moves are needed; true when all required
12717 insns have been emitted. Operands 2-4 contain the input values
12718 int the correct order; operands 5-7 contain the output values. */
12719
12720 void
12721 ix86_split_long_move (rtx operands[])
12722 {
12723 rtx part[2][3];
12724 int nparts;
12725 int push = 0;
12726 int collisions = 0;
12727 enum machine_mode mode = GET_MODE (operands[0]);
12728
12729 /* The DFmode expanders may ask us to move double.
12730 For 64bit target this is single move. By hiding the fact
12731 here we simplify i386.md splitters. */
12732 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12733 {
12734 /* Optimize constant pool reference to immediates. This is used by
12735 fp moves, that force all constants to memory to allow combining. */
12736
12737 if (MEM_P (operands[1])
12738 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12739 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12740 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12741 if (push_operand (operands[0], VOIDmode))
12742 {
12743 operands[0] = copy_rtx (operands[0]);
12744 PUT_MODE (operands[0], Pmode);
12745 }
12746 else
12747 operands[0] = gen_lowpart (DImode, operands[0]);
12748 operands[1] = gen_lowpart (DImode, operands[1]);
12749 emit_move_insn (operands[0], operands[1]);
12750 return;
12751 }
12752
12753 /* The only non-offsettable memory we handle is push. */
12754 if (push_operand (operands[0], VOIDmode))
12755 push = 1;
12756 else
12757 gcc_assert (!MEM_P (operands[0])
12758 || offsettable_memref_p (operands[0]));
12759
12760 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12761 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12762
12763 /* When emitting push, take care for source operands on the stack. */
12764 if (push && MEM_P (operands[1])
12765 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12766 {
12767 if (nparts == 3)
12768 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12769 XEXP (part[1][2], 0));
12770 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12771 XEXP (part[1][1], 0));
12772 }
12773
12774 /* We need to do copy in the right order in case an address register
12775 of the source overlaps the destination. */
12776 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12777 {
12778 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12779 collisions++;
12780 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12781 collisions++;
12782 if (nparts == 3
12783 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12784 collisions++;
12785
12786 /* Collision in the middle part can be handled by reordering. */
12787 if (collisions == 1 && nparts == 3
12788 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12789 {
12790 rtx tmp;
12791 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12792 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12793 }
12794
12795 /* If there are more collisions, we can't handle it by reordering.
12796 Do an lea to the last part and use only one colliding move. */
12797 else if (collisions > 1)
12798 {
12799 rtx base;
12800
12801 collisions = 1;
12802
12803 base = part[0][nparts - 1];
12804
12805 /* Handle the case when the last part isn't valid for lea.
12806 Happens in 64-bit mode storing the 12-byte XFmode. */
12807 if (GET_MODE (base) != Pmode)
12808 base = gen_rtx_REG (Pmode, REGNO (base));
12809
12810 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12811 part[1][0] = replace_equiv_address (part[1][0], base);
12812 part[1][1] = replace_equiv_address (part[1][1],
12813 plus_constant (base, UNITS_PER_WORD));
12814 if (nparts == 3)
12815 part[1][2] = replace_equiv_address (part[1][2],
12816 plus_constant (base, 8));
12817 }
12818 }
12819
12820 if (push)
12821 {
12822 if (!TARGET_64BIT)
12823 {
12824 if (nparts == 3)
12825 {
12826 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12827 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12828 emit_move_insn (part[0][2], part[1][2]);
12829 }
12830 }
12831 else
12832 {
12833 /* In 64bit mode we don't have 32bit push available. In case this is
12834 register, it is OK - we will just use larger counterpart. We also
12835 retype memory - these comes from attempt to avoid REX prefix on
12836 moving of second half of TFmode value. */
12837 if (GET_MODE (part[1][1]) == SImode)
12838 {
12839 switch (GET_CODE (part[1][1]))
12840 {
12841 case MEM:
12842 part[1][1] = adjust_address (part[1][1], DImode, 0);
12843 break;
12844
12845 case REG:
12846 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12847 break;
12848
12849 default:
12850 gcc_unreachable ();
12851 }
12852
12853 if (GET_MODE (part[1][0]) == SImode)
12854 part[1][0] = part[1][1];
12855 }
12856 }
12857 emit_move_insn (part[0][1], part[1][1]);
12858 emit_move_insn (part[0][0], part[1][0]);
12859 return;
12860 }
12861
12862 /* Choose correct order to not overwrite the source before it is copied. */
12863 if ((REG_P (part[0][0])
12864 && REG_P (part[1][1])
12865 && (REGNO (part[0][0]) == REGNO (part[1][1])
12866 || (nparts == 3
12867 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12868 || (collisions > 0
12869 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12870 {
12871 if (nparts == 3)
12872 {
12873 operands[2] = part[0][2];
12874 operands[3] = part[0][1];
12875 operands[4] = part[0][0];
12876 operands[5] = part[1][2];
12877 operands[6] = part[1][1];
12878 operands[7] = part[1][0];
12879 }
12880 else
12881 {
12882 operands[2] = part[0][1];
12883 operands[3] = part[0][0];
12884 operands[5] = part[1][1];
12885 operands[6] = part[1][0];
12886 }
12887 }
12888 else
12889 {
12890 if (nparts == 3)
12891 {
12892 operands[2] = part[0][0];
12893 operands[3] = part[0][1];
12894 operands[4] = part[0][2];
12895 operands[5] = part[1][0];
12896 operands[6] = part[1][1];
12897 operands[7] = part[1][2];
12898 }
12899 else
12900 {
12901 operands[2] = part[0][0];
12902 operands[3] = part[0][1];
12903 operands[5] = part[1][0];
12904 operands[6] = part[1][1];
12905 }
12906 }
12907
12908 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12909 if (optimize_size)
12910 {
12911 if (CONST_INT_P (operands[5])
12912 && operands[5] != const0_rtx
12913 && REG_P (operands[2]))
12914 {
12915 if (CONST_INT_P (operands[6])
12916 && INTVAL (operands[6]) == INTVAL (operands[5]))
12917 operands[6] = operands[2];
12918
12919 if (nparts == 3
12920 && CONST_INT_P (operands[7])
12921 && INTVAL (operands[7]) == INTVAL (operands[5]))
12922 operands[7] = operands[2];
12923 }
12924
12925 if (nparts == 3
12926 && CONST_INT_P (operands[6])
12927 && operands[6] != const0_rtx
12928 && REG_P (operands[3])
12929 && CONST_INT_P (operands[7])
12930 && INTVAL (operands[7]) == INTVAL (operands[6]))
12931 operands[7] = operands[3];
12932 }
12933
12934 emit_move_insn (operands[2], operands[5]);
12935 emit_move_insn (operands[3], operands[6]);
12936 if (nparts == 3)
12937 emit_move_insn (operands[4], operands[7]);
12938
12939 return;
12940 }
12941
12942 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12943 left shift by a constant, either using a single shift or
12944 a sequence of add instructions. */
12945
12946 static void
12947 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12948 {
12949 if (count == 1)
12950 {
12951 emit_insn ((mode == DImode
12952 ? gen_addsi3
12953 : gen_adddi3) (operand, operand, operand));
12954 }
12955 else if (!optimize_size
12956 && count * ix86_cost->add <= ix86_cost->shift_const)
12957 {
12958 int i;
12959 for (i=0; i<count; i++)
12960 {
12961 emit_insn ((mode == DImode
12962 ? gen_addsi3
12963 : gen_adddi3) (operand, operand, operand));
12964 }
12965 }
12966 else
12967 emit_insn ((mode == DImode
12968 ? gen_ashlsi3
12969 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12970 }
12971
12972 void
12973 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12974 {
12975 rtx low[2], high[2];
12976 int count;
12977 const int single_width = mode == DImode ? 32 : 64;
12978
12979 if (CONST_INT_P (operands[2]))
12980 {
12981 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12982 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12983
12984 if (count >= single_width)
12985 {
12986 emit_move_insn (high[0], low[1]);
12987 emit_move_insn (low[0], const0_rtx);
12988
12989 if (count > single_width)
12990 ix86_expand_ashl_const (high[0], count - single_width, mode);
12991 }
12992 else
12993 {
12994 if (!rtx_equal_p (operands[0], operands[1]))
12995 emit_move_insn (operands[0], operands[1]);
12996 emit_insn ((mode == DImode
12997 ? gen_x86_shld_1
12998 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12999 ix86_expand_ashl_const (low[0], count, mode);
13000 }
13001 return;
13002 }
13003
13004 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13005
13006 if (operands[1] == const1_rtx)
13007 {
13008 /* Assuming we've chosen a QImode capable registers, then 1 << N
13009 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13010 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13011 {
13012 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13013
13014 ix86_expand_clear (low[0]);
13015 ix86_expand_clear (high[0]);
13016 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13017
13018 d = gen_lowpart (QImode, low[0]);
13019 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13020 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13021 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13022
13023 d = gen_lowpart (QImode, high[0]);
13024 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13025 s = gen_rtx_NE (QImode, flags, const0_rtx);
13026 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13027 }
13028
13029 /* Otherwise, we can get the same results by manually performing
13030 a bit extract operation on bit 5/6, and then performing the two
13031 shifts. The two methods of getting 0/1 into low/high are exactly
13032 the same size. Avoiding the shift in the bit extract case helps
13033 pentium4 a bit; no one else seems to care much either way. */
13034 else
13035 {
13036 rtx x;
13037
13038 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13039 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13040 else
13041 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13042 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13043
13044 emit_insn ((mode == DImode
13045 ? gen_lshrsi3
13046 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13047 emit_insn ((mode == DImode
13048 ? gen_andsi3
13049 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13050 emit_move_insn (low[0], high[0]);
13051 emit_insn ((mode == DImode
13052 ? gen_xorsi3
13053 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13054 }
13055
13056 emit_insn ((mode == DImode
13057 ? gen_ashlsi3
13058 : gen_ashldi3) (low[0], low[0], operands[2]));
13059 emit_insn ((mode == DImode
13060 ? gen_ashlsi3
13061 : gen_ashldi3) (high[0], high[0], operands[2]));
13062 return;
13063 }
13064
13065 if (operands[1] == constm1_rtx)
13066 {
13067 /* For -1 << N, we can avoid the shld instruction, because we
13068 know that we're shifting 0...31/63 ones into a -1. */
13069 emit_move_insn (low[0], constm1_rtx);
13070 if (optimize_size)
13071 emit_move_insn (high[0], low[0]);
13072 else
13073 emit_move_insn (high[0], constm1_rtx);
13074 }
13075 else
13076 {
13077 if (!rtx_equal_p (operands[0], operands[1]))
13078 emit_move_insn (operands[0], operands[1]);
13079
13080 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13081 emit_insn ((mode == DImode
13082 ? gen_x86_shld_1
13083 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13084 }
13085
13086 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13087
13088 if (TARGET_CMOVE && scratch)
13089 {
13090 ix86_expand_clear (scratch);
13091 emit_insn ((mode == DImode
13092 ? gen_x86_shift_adj_1
13093 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13094 }
13095 else
13096 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13097 }
13098
13099 void
13100 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13101 {
13102 rtx low[2], high[2];
13103 int count;
13104 const int single_width = mode == DImode ? 32 : 64;
13105
13106 if (CONST_INT_P (operands[2]))
13107 {
13108 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13109 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13110
13111 if (count == single_width * 2 - 1)
13112 {
13113 emit_move_insn (high[0], high[1]);
13114 emit_insn ((mode == DImode
13115 ? gen_ashrsi3
13116 : gen_ashrdi3) (high[0], high[0],
13117 GEN_INT (single_width - 1)));
13118 emit_move_insn (low[0], high[0]);
13119
13120 }
13121 else if (count >= single_width)
13122 {
13123 emit_move_insn (low[0], high[1]);
13124 emit_move_insn (high[0], low[0]);
13125 emit_insn ((mode == DImode
13126 ? gen_ashrsi3
13127 : gen_ashrdi3) (high[0], high[0],
13128 GEN_INT (single_width - 1)));
13129 if (count > single_width)
13130 emit_insn ((mode == DImode
13131 ? gen_ashrsi3
13132 : gen_ashrdi3) (low[0], low[0],
13133 GEN_INT (count - single_width)));
13134 }
13135 else
13136 {
13137 if (!rtx_equal_p (operands[0], operands[1]))
13138 emit_move_insn (operands[0], operands[1]);
13139 emit_insn ((mode == DImode
13140 ? gen_x86_shrd_1
13141 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13142 emit_insn ((mode == DImode
13143 ? gen_ashrsi3
13144 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13145 }
13146 }
13147 else
13148 {
13149 if (!rtx_equal_p (operands[0], operands[1]))
13150 emit_move_insn (operands[0], operands[1]);
13151
13152 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13153
13154 emit_insn ((mode == DImode
13155 ? gen_x86_shrd_1
13156 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13157 emit_insn ((mode == DImode
13158 ? gen_ashrsi3
13159 : gen_ashrdi3) (high[0], high[0], operands[2]));
13160
13161 if (TARGET_CMOVE && scratch)
13162 {
13163 emit_move_insn (scratch, high[0]);
13164 emit_insn ((mode == DImode
13165 ? gen_ashrsi3
13166 : gen_ashrdi3) (scratch, scratch,
13167 GEN_INT (single_width - 1)));
13168 emit_insn ((mode == DImode
13169 ? gen_x86_shift_adj_1
13170 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13171 scratch));
13172 }
13173 else
13174 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13175 }
13176 }
13177
13178 void
13179 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13180 {
13181 rtx low[2], high[2];
13182 int count;
13183 const int single_width = mode == DImode ? 32 : 64;
13184
13185 if (CONST_INT_P (operands[2]))
13186 {
13187 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13188 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13189
13190 if (count >= single_width)
13191 {
13192 emit_move_insn (low[0], high[1]);
13193 ix86_expand_clear (high[0]);
13194
13195 if (count > single_width)
13196 emit_insn ((mode == DImode
13197 ? gen_lshrsi3
13198 : gen_lshrdi3) (low[0], low[0],
13199 GEN_INT (count - single_width)));
13200 }
13201 else
13202 {
13203 if (!rtx_equal_p (operands[0], operands[1]))
13204 emit_move_insn (operands[0], operands[1]);
13205 emit_insn ((mode == DImode
13206 ? gen_x86_shrd_1
13207 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13208 emit_insn ((mode == DImode
13209 ? gen_lshrsi3
13210 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13211 }
13212 }
13213 else
13214 {
13215 if (!rtx_equal_p (operands[0], operands[1]))
13216 emit_move_insn (operands[0], operands[1]);
13217
13218 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13219
13220 emit_insn ((mode == DImode
13221 ? gen_x86_shrd_1
13222 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13223 emit_insn ((mode == DImode
13224 ? gen_lshrsi3
13225 : gen_lshrdi3) (high[0], high[0], operands[2]));
13226
13227 /* Heh. By reversing the arguments, we can reuse this pattern. */
13228 if (TARGET_CMOVE && scratch)
13229 {
13230 ix86_expand_clear (scratch);
13231 emit_insn ((mode == DImode
13232 ? gen_x86_shift_adj_1
13233 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13234 scratch));
13235 }
13236 else
13237 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13238 }
13239 }
13240
13241 /* Predict just emitted jump instruction to be taken with probability PROB. */
13242 static void
13243 predict_jump (int prob)
13244 {
13245 rtx insn = get_last_insn ();
13246 gcc_assert (JUMP_P (insn));
13247 REG_NOTES (insn)
13248 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13249 GEN_INT (prob),
13250 REG_NOTES (insn));
13251 }
13252
13253 /* Helper function for the string operations below. Dest VARIABLE whether
13254 it is aligned to VALUE bytes. If true, jump to the label. */
13255 static rtx
13256 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13257 {
13258 rtx label = gen_label_rtx ();
13259 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13260 if (GET_MODE (variable) == DImode)
13261 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13262 else
13263 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13264 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13265 1, label);
13266 if (epilogue)
13267 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13268 else
13269 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13270 return label;
13271 }
13272
13273 /* Adjust COUNTER by the VALUE. */
13274 static void
13275 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13276 {
13277 if (GET_MODE (countreg) == DImode)
13278 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13279 else
13280 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13281 }
13282
13283 /* Zero extend possibly SImode EXP to Pmode register. */
13284 rtx
13285 ix86_zero_extend_to_Pmode (rtx exp)
13286 {
13287 rtx r;
13288 if (GET_MODE (exp) == VOIDmode)
13289 return force_reg (Pmode, exp);
13290 if (GET_MODE (exp) == Pmode)
13291 return copy_to_mode_reg (Pmode, exp);
13292 r = gen_reg_rtx (Pmode);
13293 emit_insn (gen_zero_extendsidi2 (r, exp));
13294 return r;
13295 }
13296
13297 /* Divide COUNTREG by SCALE. */
13298 static rtx
13299 scale_counter (rtx countreg, int scale)
13300 {
13301 rtx sc;
13302 rtx piece_size_mask;
13303
13304 if (scale == 1)
13305 return countreg;
13306 if (CONST_INT_P (countreg))
13307 return GEN_INT (INTVAL (countreg) / scale);
13308 gcc_assert (REG_P (countreg));
13309
13310 piece_size_mask = GEN_INT (scale - 1);
13311 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13312 GEN_INT (exact_log2 (scale)),
13313 NULL, 1, OPTAB_DIRECT);
13314 return sc;
13315 }
13316
13317 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13318 for constant loop counts. */
13319
13320 static enum machine_mode
13321 counter_mode (rtx count_exp)
13322 {
13323 if (GET_MODE (count_exp) != VOIDmode)
13324 return GET_MODE (count_exp);
13325 if (GET_CODE (count_exp) != CONST_INT)
13326 return Pmode;
13327 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13328 return DImode;
13329 return SImode;
13330 }
13331
13332 /* When SRCPTR is non-NULL, output simple loop to move memory
13333 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13334 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13335 equivalent loop to set memory by VALUE (supposed to be in MODE).
13336
13337 The size is rounded down to whole number of chunk size moved at once.
13338 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13339
13340
13341 static void
13342 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13343 rtx destptr, rtx srcptr, rtx value,
13344 rtx count, enum machine_mode mode, int unroll,
13345 int expected_size)
13346 {
13347 rtx out_label, top_label, iter, tmp;
13348 enum machine_mode iter_mode = counter_mode (count);
13349 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13350 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13351 rtx size;
13352 rtx x_addr;
13353 rtx y_addr;
13354 int i;
13355
13356 top_label = gen_label_rtx ();
13357 out_label = gen_label_rtx ();
13358 iter = gen_reg_rtx (iter_mode);
13359
13360 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13361 NULL, 1, OPTAB_DIRECT);
13362 /* Those two should combine. */
13363 if (piece_size == const1_rtx)
13364 {
13365 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13366 true, out_label);
13367 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13368 }
13369 emit_move_insn (iter, const0_rtx);
13370
13371 emit_label (top_label);
13372
13373 tmp = convert_modes (Pmode, iter_mode, iter, true);
13374 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13375 destmem = change_address (destmem, mode, x_addr);
13376
13377 if (srcmem)
13378 {
13379 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13380 srcmem = change_address (srcmem, mode, y_addr);
13381
13382 /* When unrolling for chips that reorder memory reads and writes,
13383 we can save registers by using single temporary.
13384 Also using 4 temporaries is overkill in 32bit mode. */
13385 if (!TARGET_64BIT && 0)
13386 {
13387 for (i = 0; i < unroll; i++)
13388 {
13389 if (i)
13390 {
13391 destmem =
13392 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13393 srcmem =
13394 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13395 }
13396 emit_move_insn (destmem, srcmem);
13397 }
13398 }
13399 else
13400 {
13401 rtx tmpreg[4];
13402 gcc_assert (unroll <= 4);
13403 for (i = 0; i < unroll; i++)
13404 {
13405 tmpreg[i] = gen_reg_rtx (mode);
13406 if (i)
13407 {
13408 srcmem =
13409 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13410 }
13411 emit_move_insn (tmpreg[i], srcmem);
13412 }
13413 for (i = 0; i < unroll; i++)
13414 {
13415 if (i)
13416 {
13417 destmem =
13418 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13419 }
13420 emit_move_insn (destmem, tmpreg[i]);
13421 }
13422 }
13423 }
13424 else
13425 for (i = 0; i < unroll; i++)
13426 {
13427 if (i)
13428 destmem =
13429 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13430 emit_move_insn (destmem, value);
13431 }
13432
13433 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13434 true, OPTAB_LIB_WIDEN);
13435 if (tmp != iter)
13436 emit_move_insn (iter, tmp);
13437
13438 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13439 true, top_label);
13440 if (expected_size != -1)
13441 {
13442 expected_size /= GET_MODE_SIZE (mode) * unroll;
13443 if (expected_size == 0)
13444 predict_jump (0);
13445 else if (expected_size > REG_BR_PROB_BASE)
13446 predict_jump (REG_BR_PROB_BASE - 1);
13447 else
13448 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13449 }
13450 else
13451 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13452 iter = ix86_zero_extend_to_Pmode (iter);
13453 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13454 true, OPTAB_LIB_WIDEN);
13455 if (tmp != destptr)
13456 emit_move_insn (destptr, tmp);
13457 if (srcptr)
13458 {
13459 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13460 true, OPTAB_LIB_WIDEN);
13461 if (tmp != srcptr)
13462 emit_move_insn (srcptr, tmp);
13463 }
13464 emit_label (out_label);
13465 }
13466
13467 /* Output "rep; mov" instruction.
13468 Arguments have same meaning as for previous function */
13469 static void
13470 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13471 rtx destptr, rtx srcptr,
13472 rtx count,
13473 enum machine_mode mode)
13474 {
13475 rtx destexp;
13476 rtx srcexp;
13477 rtx countreg;
13478
13479 /* If the size is known, it is shorter to use rep movs. */
13480 if (mode == QImode && CONST_INT_P (count)
13481 && !(INTVAL (count) & 3))
13482 mode = SImode;
13483
13484 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13485 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13486 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13487 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13488 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13489 if (mode != QImode)
13490 {
13491 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13492 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13493 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13494 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13495 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13496 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13497 }
13498 else
13499 {
13500 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13501 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13502 }
13503 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13504 destexp, srcexp));
13505 }
13506
13507 /* Output "rep; stos" instruction.
13508 Arguments have same meaning as for previous function */
13509 static void
13510 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13511 rtx count,
13512 enum machine_mode mode)
13513 {
13514 rtx destexp;
13515 rtx countreg;
13516
13517 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13518 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13519 value = force_reg (mode, gen_lowpart (mode, value));
13520 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13521 if (mode != QImode)
13522 {
13523 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13524 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13525 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13526 }
13527 else
13528 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13529 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13530 }
13531
13532 static void
13533 emit_strmov (rtx destmem, rtx srcmem,
13534 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13535 {
13536 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13537 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13538 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13539 }
13540
13541 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13542 static void
13543 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13544 rtx destptr, rtx srcptr, rtx count, int max_size)
13545 {
13546 rtx src, dest;
13547 if (CONST_INT_P (count))
13548 {
13549 HOST_WIDE_INT countval = INTVAL (count);
13550 int offset = 0;
13551
13552 if ((countval & 0x10) && max_size > 16)
13553 {
13554 if (TARGET_64BIT)
13555 {
13556 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13557 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13558 }
13559 else
13560 gcc_unreachable ();
13561 offset += 16;
13562 }
13563 if ((countval & 0x08) && max_size > 8)
13564 {
13565 if (TARGET_64BIT)
13566 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13567 else
13568 {
13569 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13570 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13571 }
13572 offset += 8;
13573 }
13574 if ((countval & 0x04) && max_size > 4)
13575 {
13576 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13577 offset += 4;
13578 }
13579 if ((countval & 0x02) && max_size > 2)
13580 {
13581 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13582 offset += 2;
13583 }
13584 if ((countval & 0x01) && max_size > 1)
13585 {
13586 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13587 offset += 1;
13588 }
13589 return;
13590 }
13591 if (max_size > 8)
13592 {
13593 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13594 count, 1, OPTAB_DIRECT);
13595 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13596 count, QImode, 1, 4);
13597 return;
13598 }
13599
13600 /* When there are stringops, we can cheaply increase dest and src pointers.
13601 Otherwise we save code size by maintaining offset (zero is readily
13602 available from preceding rep operation) and using x86 addressing modes.
13603 */
13604 if (TARGET_SINGLE_STRINGOP)
13605 {
13606 if (max_size > 4)
13607 {
13608 rtx label = ix86_expand_aligntest (count, 4, true);
13609 src = change_address (srcmem, SImode, srcptr);
13610 dest = change_address (destmem, SImode, destptr);
13611 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13612 emit_label (label);
13613 LABEL_NUSES (label) = 1;
13614 }
13615 if (max_size > 2)
13616 {
13617 rtx label = ix86_expand_aligntest (count, 2, true);
13618 src = change_address (srcmem, HImode, srcptr);
13619 dest = change_address (destmem, HImode, destptr);
13620 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13621 emit_label (label);
13622 LABEL_NUSES (label) = 1;
13623 }
13624 if (max_size > 1)
13625 {
13626 rtx label = ix86_expand_aligntest (count, 1, true);
13627 src = change_address (srcmem, QImode, srcptr);
13628 dest = change_address (destmem, QImode, destptr);
13629 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13630 emit_label (label);
13631 LABEL_NUSES (label) = 1;
13632 }
13633 }
13634 else
13635 {
13636 rtx offset = force_reg (Pmode, const0_rtx);
13637 rtx tmp;
13638
13639 if (max_size > 4)
13640 {
13641 rtx label = ix86_expand_aligntest (count, 4, true);
13642 src = change_address (srcmem, SImode, srcptr);
13643 dest = change_address (destmem, SImode, destptr);
13644 emit_move_insn (dest, src);
13645 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13646 true, OPTAB_LIB_WIDEN);
13647 if (tmp != offset)
13648 emit_move_insn (offset, tmp);
13649 emit_label (label);
13650 LABEL_NUSES (label) = 1;
13651 }
13652 if (max_size > 2)
13653 {
13654 rtx label = ix86_expand_aligntest (count, 2, true);
13655 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13656 src = change_address (srcmem, HImode, tmp);
13657 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13658 dest = change_address (destmem, HImode, tmp);
13659 emit_move_insn (dest, src);
13660 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13661 true, OPTAB_LIB_WIDEN);
13662 if (tmp != offset)
13663 emit_move_insn (offset, tmp);
13664 emit_label (label);
13665 LABEL_NUSES (label) = 1;
13666 }
13667 if (max_size > 1)
13668 {
13669 rtx label = ix86_expand_aligntest (count, 1, true);
13670 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13671 src = change_address (srcmem, QImode, tmp);
13672 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13673 dest = change_address (destmem, QImode, tmp);
13674 emit_move_insn (dest, src);
13675 emit_label (label);
13676 LABEL_NUSES (label) = 1;
13677 }
13678 }
13679 }
13680
13681 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13682 static void
13683 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13684 rtx count, int max_size)
13685 {
13686 count =
13687 expand_simple_binop (counter_mode (count), AND, count,
13688 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13689 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13690 gen_lowpart (QImode, value), count, QImode,
13691 1, max_size / 2);
13692 }
13693
13694 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13695 static void
13696 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13697 {
13698 rtx dest;
13699
13700 if (CONST_INT_P (count))
13701 {
13702 HOST_WIDE_INT countval = INTVAL (count);
13703 int offset = 0;
13704
13705 if ((countval & 0x10) && max_size > 16)
13706 {
13707 if (TARGET_64BIT)
13708 {
13709 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13710 emit_insn (gen_strset (destptr, dest, value));
13711 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13712 emit_insn (gen_strset (destptr, dest, value));
13713 }
13714 else
13715 gcc_unreachable ();
13716 offset += 16;
13717 }
13718 if ((countval & 0x08) && max_size > 8)
13719 {
13720 if (TARGET_64BIT)
13721 {
13722 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13723 emit_insn (gen_strset (destptr, dest, value));
13724 }
13725 else
13726 {
13727 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13728 emit_insn (gen_strset (destptr, dest, value));
13729 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13730 emit_insn (gen_strset (destptr, dest, value));
13731 }
13732 offset += 8;
13733 }
13734 if ((countval & 0x04) && max_size > 4)
13735 {
13736 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13737 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13738 offset += 4;
13739 }
13740 if ((countval & 0x02) && max_size > 2)
13741 {
13742 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13743 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13744 offset += 2;
13745 }
13746 if ((countval & 0x01) && max_size > 1)
13747 {
13748 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13749 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13750 offset += 1;
13751 }
13752 return;
13753 }
13754 if (max_size > 32)
13755 {
13756 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13757 return;
13758 }
13759 if (max_size > 16)
13760 {
13761 rtx label = ix86_expand_aligntest (count, 16, true);
13762 if (TARGET_64BIT)
13763 {
13764 dest = change_address (destmem, DImode, destptr);
13765 emit_insn (gen_strset (destptr, dest, value));
13766 emit_insn (gen_strset (destptr, dest, value));
13767 }
13768 else
13769 {
13770 dest = change_address (destmem, SImode, destptr);
13771 emit_insn (gen_strset (destptr, dest, value));
13772 emit_insn (gen_strset (destptr, dest, value));
13773 emit_insn (gen_strset (destptr, dest, value));
13774 emit_insn (gen_strset (destptr, dest, value));
13775 }
13776 emit_label (label);
13777 LABEL_NUSES (label) = 1;
13778 }
13779 if (max_size > 8)
13780 {
13781 rtx label = ix86_expand_aligntest (count, 8, true);
13782 if (TARGET_64BIT)
13783 {
13784 dest = change_address (destmem, DImode, destptr);
13785 emit_insn (gen_strset (destptr, dest, value));
13786 }
13787 else
13788 {
13789 dest = change_address (destmem, SImode, destptr);
13790 emit_insn (gen_strset (destptr, dest, value));
13791 emit_insn (gen_strset (destptr, dest, value));
13792 }
13793 emit_label (label);
13794 LABEL_NUSES (label) = 1;
13795 }
13796 if (max_size > 4)
13797 {
13798 rtx label = ix86_expand_aligntest (count, 4, true);
13799 dest = change_address (destmem, SImode, destptr);
13800 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13801 emit_label (label);
13802 LABEL_NUSES (label) = 1;
13803 }
13804 if (max_size > 2)
13805 {
13806 rtx label = ix86_expand_aligntest (count, 2, true);
13807 dest = change_address (destmem, HImode, destptr);
13808 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13809 emit_label (label);
13810 LABEL_NUSES (label) = 1;
13811 }
13812 if (max_size > 1)
13813 {
13814 rtx label = ix86_expand_aligntest (count, 1, true);
13815 dest = change_address (destmem, QImode, destptr);
13816 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13817 emit_label (label);
13818 LABEL_NUSES (label) = 1;
13819 }
13820 }
13821
13822 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13823 DESIRED_ALIGNMENT. */
13824 static void
13825 expand_movmem_prologue (rtx destmem, rtx srcmem,
13826 rtx destptr, rtx srcptr, rtx count,
13827 int align, int desired_alignment)
13828 {
13829 if (align <= 1 && desired_alignment > 1)
13830 {
13831 rtx label = ix86_expand_aligntest (destptr, 1, false);
13832 srcmem = change_address (srcmem, QImode, srcptr);
13833 destmem = change_address (destmem, QImode, destptr);
13834 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13835 ix86_adjust_counter (count, 1);
13836 emit_label (label);
13837 LABEL_NUSES (label) = 1;
13838 }
13839 if (align <= 2 && desired_alignment > 2)
13840 {
13841 rtx label = ix86_expand_aligntest (destptr, 2, false);
13842 srcmem = change_address (srcmem, HImode, srcptr);
13843 destmem = change_address (destmem, HImode, destptr);
13844 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13845 ix86_adjust_counter (count, 2);
13846 emit_label (label);
13847 LABEL_NUSES (label) = 1;
13848 }
13849 if (align <= 4 && desired_alignment > 4)
13850 {
13851 rtx label = ix86_expand_aligntest (destptr, 4, false);
13852 srcmem = change_address (srcmem, SImode, srcptr);
13853 destmem = change_address (destmem, SImode, destptr);
13854 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13855 ix86_adjust_counter (count, 4);
13856 emit_label (label);
13857 LABEL_NUSES (label) = 1;
13858 }
13859 gcc_assert (desired_alignment <= 8);
13860 }
13861
13862 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13863 DESIRED_ALIGNMENT. */
13864 static void
13865 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13866 int align, int desired_alignment)
13867 {
13868 if (align <= 1 && desired_alignment > 1)
13869 {
13870 rtx label = ix86_expand_aligntest (destptr, 1, false);
13871 destmem = change_address (destmem, QImode, destptr);
13872 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13873 ix86_adjust_counter (count, 1);
13874 emit_label (label);
13875 LABEL_NUSES (label) = 1;
13876 }
13877 if (align <= 2 && desired_alignment > 2)
13878 {
13879 rtx label = ix86_expand_aligntest (destptr, 2, false);
13880 destmem = change_address (destmem, HImode, destptr);
13881 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13882 ix86_adjust_counter (count, 2);
13883 emit_label (label);
13884 LABEL_NUSES (label) = 1;
13885 }
13886 if (align <= 4 && desired_alignment > 4)
13887 {
13888 rtx label = ix86_expand_aligntest (destptr, 4, false);
13889 destmem = change_address (destmem, SImode, destptr);
13890 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13891 ix86_adjust_counter (count, 4);
13892 emit_label (label);
13893 LABEL_NUSES (label) = 1;
13894 }
13895 gcc_assert (desired_alignment <= 8);
13896 }
13897
13898 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13899 static enum stringop_alg
13900 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13901 int *dynamic_check)
13902 {
13903 const struct stringop_algs * algs;
13904
13905 *dynamic_check = -1;
13906 if (memset)
13907 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13908 else
13909 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13910 if (stringop_alg != no_stringop)
13911 return stringop_alg;
13912 /* rep; movq or rep; movl is the smallest variant. */
13913 else if (optimize_size)
13914 {
13915 if (!count || (count & 3))
13916 return rep_prefix_1_byte;
13917 else
13918 return rep_prefix_4_byte;
13919 }
13920 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13921 */
13922 else if (expected_size != -1 && expected_size < 4)
13923 return loop_1_byte;
13924 else if (expected_size != -1)
13925 {
13926 unsigned int i;
13927 enum stringop_alg alg = libcall;
13928 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13929 {
13930 gcc_assert (algs->size[i].max);
13931 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13932 {
13933 if (algs->size[i].alg != libcall)
13934 alg = algs->size[i].alg;
13935 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13936 last non-libcall inline algorithm. */
13937 if (TARGET_INLINE_ALL_STRINGOPS)
13938 {
13939 /* When the current size is best to be copied by a libcall,
13940 but we are still forced to inline, run the heuristic bellow
13941 that will pick code for medium sized blocks. */
13942 if (alg != libcall)
13943 return alg;
13944 break;
13945 }
13946 else
13947 return algs->size[i].alg;
13948 }
13949 }
13950 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13951 }
13952 /* When asked to inline the call anyway, try to pick meaningful choice.
13953 We look for maximal size of block that is faster to copy by hand and
13954 take blocks of at most of that size guessing that average size will
13955 be roughly half of the block.
13956
13957 If this turns out to be bad, we might simply specify the preferred
13958 choice in ix86_costs. */
13959 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13960 && algs->unknown_size == libcall)
13961 {
13962 int max = -1;
13963 enum stringop_alg alg;
13964 int i;
13965
13966 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13967 if (algs->size[i].alg != libcall && algs->size[i].alg)
13968 max = algs->size[i].max;
13969 if (max == -1)
13970 max = 4096;
13971 alg = decide_alg (count, max / 2, memset, dynamic_check);
13972 gcc_assert (*dynamic_check == -1);
13973 gcc_assert (alg != libcall);
13974 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13975 *dynamic_check = max;
13976 return alg;
13977 }
13978 return algs->unknown_size;
13979 }
13980
13981 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13982 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13983 static int
13984 decide_alignment (int align,
13985 enum stringop_alg alg,
13986 int expected_size)
13987 {
13988 int desired_align = 0;
13989 switch (alg)
13990 {
13991 case no_stringop:
13992 gcc_unreachable ();
13993 case loop:
13994 case unrolled_loop:
13995 desired_align = GET_MODE_SIZE (Pmode);
13996 break;
13997 case rep_prefix_8_byte:
13998 desired_align = 8;
13999 break;
14000 case rep_prefix_4_byte:
14001 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14002 copying whole cacheline at once. */
14003 if (TARGET_PENTIUMPRO)
14004 desired_align = 8;
14005 else
14006 desired_align = 4;
14007 break;
14008 case rep_prefix_1_byte:
14009 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14010 copying whole cacheline at once. */
14011 if (TARGET_PENTIUMPRO)
14012 desired_align = 8;
14013 else
14014 desired_align = 1;
14015 break;
14016 case loop_1_byte:
14017 desired_align = 1;
14018 break;
14019 case libcall:
14020 return 0;
14021 }
14022
14023 if (optimize_size)
14024 desired_align = 1;
14025 if (desired_align < align)
14026 desired_align = align;
14027 if (expected_size != -1 && expected_size < 4)
14028 desired_align = align;
14029 return desired_align;
14030 }
14031
14032 /* Return the smallest power of 2 greater than VAL. */
14033 static int
14034 smallest_pow2_greater_than (int val)
14035 {
14036 int ret = 1;
14037 while (ret <= val)
14038 ret <<= 1;
14039 return ret;
14040 }
14041
14042 /* Expand string move (memcpy) operation. Use i386 string operations when
14043 profitable. expand_clrmem contains similar code. The code depends upon
14044 architecture, block size and alignment, but always has the same
14045 overall structure:
14046
14047 1) Prologue guard: Conditional that jumps up to epilogues for small
14048 blocks that can be handled by epilogue alone. This is faster but
14049 also needed for correctness, since prologue assume the block is larger
14050 than the desired alignment.
14051
14052 Optional dynamic check for size and libcall for large
14053 blocks is emitted here too, with -minline-stringops-dynamically.
14054
14055 2) Prologue: copy first few bytes in order to get destination aligned
14056 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14057 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14058 We emit either a jump tree on power of two sized blocks, or a byte loop.
14059
14060 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14061 with specified algorithm.
14062
14063 4) Epilogue: code copying tail of the block that is too small to be
14064 handled by main body (or up to size guarded by prologue guard). */
14065
14066 int
14067 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14068 rtx expected_align_exp, rtx expected_size_exp)
14069 {
14070 rtx destreg;
14071 rtx srcreg;
14072 rtx label = NULL;
14073 rtx tmp;
14074 rtx jump_around_label = NULL;
14075 HOST_WIDE_INT align = 1;
14076 unsigned HOST_WIDE_INT count = 0;
14077 HOST_WIDE_INT expected_size = -1;
14078 int size_needed = 0, epilogue_size_needed;
14079 int desired_align = 0;
14080 enum stringop_alg alg;
14081 int dynamic_check;
14082
14083 if (CONST_INT_P (align_exp))
14084 align = INTVAL (align_exp);
14085 /* i386 can do misaligned access on reasonably increased cost. */
14086 if (CONST_INT_P (expected_align_exp)
14087 && INTVAL (expected_align_exp) > align)
14088 align = INTVAL (expected_align_exp);
14089 if (CONST_INT_P (count_exp))
14090 count = expected_size = INTVAL (count_exp);
14091 if (CONST_INT_P (expected_size_exp) && count == 0)
14092 expected_size = INTVAL (expected_size_exp);
14093
14094 /* Step 0: Decide on preferred algorithm, desired alignment and
14095 size of chunks to be copied by main loop. */
14096
14097 alg = decide_alg (count, expected_size, false, &dynamic_check);
14098 desired_align = decide_alignment (align, alg, expected_size);
14099
14100 if (!TARGET_ALIGN_STRINGOPS)
14101 align = desired_align;
14102
14103 if (alg == libcall)
14104 return 0;
14105 gcc_assert (alg != no_stringop);
14106 if (!count)
14107 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14108 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14109 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14110 switch (alg)
14111 {
14112 case libcall:
14113 case no_stringop:
14114 gcc_unreachable ();
14115 case loop:
14116 size_needed = GET_MODE_SIZE (Pmode);
14117 break;
14118 case unrolled_loop:
14119 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14120 break;
14121 case rep_prefix_8_byte:
14122 size_needed = 8;
14123 break;
14124 case rep_prefix_4_byte:
14125 size_needed = 4;
14126 break;
14127 case rep_prefix_1_byte:
14128 case loop_1_byte:
14129 size_needed = 1;
14130 break;
14131 }
14132
14133 epilogue_size_needed = size_needed;
14134
14135 /* Step 1: Prologue guard. */
14136
14137 /* Alignment code needs count to be in register. */
14138 if (CONST_INT_P (count_exp) && desired_align > align)
14139 {
14140 enum machine_mode mode = SImode;
14141 if (TARGET_64BIT && (count & ~0xffffffff))
14142 mode = DImode;
14143 count_exp = force_reg (mode, count_exp);
14144 }
14145 gcc_assert (desired_align >= 1 && align >= 1);
14146
14147 /* Ensure that alignment prologue won't copy past end of block. */
14148 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14149 {
14150 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14151 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14152 Make sure it is power of 2. */
14153 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14154
14155 label = gen_label_rtx ();
14156 emit_cmp_and_jump_insns (count_exp,
14157 GEN_INT (epilogue_size_needed),
14158 LTU, 0, counter_mode (count_exp), 1, label);
14159 if (GET_CODE (count_exp) == CONST_INT)
14160 ;
14161 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14162 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14163 else
14164 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14165 }
14166 /* Emit code to decide on runtime whether library call or inline should be
14167 used. */
14168 if (dynamic_check != -1)
14169 {
14170 rtx hot_label = gen_label_rtx ();
14171 jump_around_label = gen_label_rtx ();
14172 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14173 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14174 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14175 emit_block_move_via_libcall (dst, src, count_exp, false);
14176 emit_jump (jump_around_label);
14177 emit_label (hot_label);
14178 }
14179
14180 /* Step 2: Alignment prologue. */
14181
14182 if (desired_align > align)
14183 {
14184 /* Except for the first move in epilogue, we no longer know
14185 constant offset in aliasing info. It don't seems to worth
14186 the pain to maintain it for the first move, so throw away
14187 the info early. */
14188 src = change_address (src, BLKmode, srcreg);
14189 dst = change_address (dst, BLKmode, destreg);
14190 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14191 desired_align);
14192 }
14193 if (label && size_needed == 1)
14194 {
14195 emit_label (label);
14196 LABEL_NUSES (label) = 1;
14197 label = NULL;
14198 }
14199
14200 /* Step 3: Main loop. */
14201
14202 switch (alg)
14203 {
14204 case libcall:
14205 case no_stringop:
14206 gcc_unreachable ();
14207 case loop_1_byte:
14208 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14209 count_exp, QImode, 1, expected_size);
14210 break;
14211 case loop:
14212 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14213 count_exp, Pmode, 1, expected_size);
14214 break;
14215 case unrolled_loop:
14216 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14217 registers for 4 temporaries anyway. */
14218 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14219 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14220 expected_size);
14221 break;
14222 case rep_prefix_8_byte:
14223 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14224 DImode);
14225 break;
14226 case rep_prefix_4_byte:
14227 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14228 SImode);
14229 break;
14230 case rep_prefix_1_byte:
14231 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14232 QImode);
14233 break;
14234 }
14235 /* Adjust properly the offset of src and dest memory for aliasing. */
14236 if (CONST_INT_P (count_exp))
14237 {
14238 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14239 (count / size_needed) * size_needed);
14240 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14241 (count / size_needed) * size_needed);
14242 }
14243 else
14244 {
14245 src = change_address (src, BLKmode, srcreg);
14246 dst = change_address (dst, BLKmode, destreg);
14247 }
14248
14249 /* Step 4: Epilogue to copy the remaining bytes. */
14250
14251 if (label)
14252 {
14253 /* When the main loop is done, COUNT_EXP might hold original count,
14254 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14255 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14256 bytes. Compensate if needed. */
14257
14258 if (size_needed < epilogue_size_needed)
14259 {
14260 tmp =
14261 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14262 GEN_INT (size_needed - 1), count_exp, 1,
14263 OPTAB_DIRECT);
14264 if (tmp != count_exp)
14265 emit_move_insn (count_exp, tmp);
14266 }
14267 emit_label (label);
14268 LABEL_NUSES (label) = 1;
14269 }
14270
14271 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14272 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14273 epilogue_size_needed);
14274 if (jump_around_label)
14275 emit_label (jump_around_label);
14276 return 1;
14277 }
14278
14279 /* Helper function for memcpy. For QImode value 0xXY produce
14280 0xXYXYXYXY of wide specified by MODE. This is essentially
14281 a * 0x10101010, but we can do slightly better than
14282 synth_mult by unwinding the sequence by hand on CPUs with
14283 slow multiply. */
14284 static rtx
14285 promote_duplicated_reg (enum machine_mode mode, rtx val)
14286 {
14287 enum machine_mode valmode = GET_MODE (val);
14288 rtx tmp;
14289 int nops = mode == DImode ? 3 : 2;
14290
14291 gcc_assert (mode == SImode || mode == DImode);
14292 if (val == const0_rtx)
14293 return copy_to_mode_reg (mode, const0_rtx);
14294 if (CONST_INT_P (val))
14295 {
14296 HOST_WIDE_INT v = INTVAL (val) & 255;
14297
14298 v |= v << 8;
14299 v |= v << 16;
14300 if (mode == DImode)
14301 v |= (v << 16) << 16;
14302 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14303 }
14304
14305 if (valmode == VOIDmode)
14306 valmode = QImode;
14307 if (valmode != QImode)
14308 val = gen_lowpart (QImode, val);
14309 if (mode == QImode)
14310 return val;
14311 if (!TARGET_PARTIAL_REG_STALL)
14312 nops--;
14313 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14314 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14315 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14316 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14317 {
14318 rtx reg = convert_modes (mode, QImode, val, true);
14319 tmp = promote_duplicated_reg (mode, const1_rtx);
14320 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14321 OPTAB_DIRECT);
14322 }
14323 else
14324 {
14325 rtx reg = convert_modes (mode, QImode, val, true);
14326
14327 if (!TARGET_PARTIAL_REG_STALL)
14328 if (mode == SImode)
14329 emit_insn (gen_movsi_insv_1 (reg, reg));
14330 else
14331 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14332 else
14333 {
14334 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14335 NULL, 1, OPTAB_DIRECT);
14336 reg =
14337 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14338 }
14339 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14340 NULL, 1, OPTAB_DIRECT);
14341 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14342 if (mode == SImode)
14343 return reg;
14344 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14345 NULL, 1, OPTAB_DIRECT);
14346 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14347 return reg;
14348 }
14349 }
14350
14351 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14352 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14353 alignment from ALIGN to DESIRED_ALIGN. */
14354 static rtx
14355 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14356 {
14357 rtx promoted_val;
14358
14359 if (TARGET_64BIT
14360 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14361 promoted_val = promote_duplicated_reg (DImode, val);
14362 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14363 promoted_val = promote_duplicated_reg (SImode, val);
14364 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14365 promoted_val = promote_duplicated_reg (HImode, val);
14366 else
14367 promoted_val = val;
14368
14369 return promoted_val;
14370 }
14371
14372 /* Expand string clear operation (bzero). Use i386 string operations when
14373 profitable. See expand_movmem comment for explanation of individual
14374 steps performed. */
14375 int
14376 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14377 rtx expected_align_exp, rtx expected_size_exp)
14378 {
14379 rtx destreg;
14380 rtx label = NULL;
14381 rtx tmp;
14382 rtx jump_around_label = NULL;
14383 HOST_WIDE_INT align = 1;
14384 unsigned HOST_WIDE_INT count = 0;
14385 HOST_WIDE_INT expected_size = -1;
14386 int size_needed = 0, epilogue_size_needed;
14387 int desired_align = 0;
14388 enum stringop_alg alg;
14389 rtx promoted_val = NULL;
14390 bool force_loopy_epilogue = false;
14391 int dynamic_check;
14392
14393 if (CONST_INT_P (align_exp))
14394 align = INTVAL (align_exp);
14395 /* i386 can do misaligned access on reasonably increased cost. */
14396 if (CONST_INT_P (expected_align_exp)
14397 && INTVAL (expected_align_exp) > align)
14398 align = INTVAL (expected_align_exp);
14399 if (CONST_INT_P (count_exp))
14400 count = expected_size = INTVAL (count_exp);
14401 if (CONST_INT_P (expected_size_exp) && count == 0)
14402 expected_size = INTVAL (expected_size_exp);
14403
14404 /* Step 0: Decide on preferred algorithm, desired alignment and
14405 size of chunks to be copied by main loop. */
14406
14407 alg = decide_alg (count, expected_size, true, &dynamic_check);
14408 desired_align = decide_alignment (align, alg, expected_size);
14409
14410 if (!TARGET_ALIGN_STRINGOPS)
14411 align = desired_align;
14412
14413 if (alg == libcall)
14414 return 0;
14415 gcc_assert (alg != no_stringop);
14416 if (!count)
14417 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14418 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14419 switch (alg)
14420 {
14421 case libcall:
14422 case no_stringop:
14423 gcc_unreachable ();
14424 case loop:
14425 size_needed = GET_MODE_SIZE (Pmode);
14426 break;
14427 case unrolled_loop:
14428 size_needed = GET_MODE_SIZE (Pmode) * 4;
14429 break;
14430 case rep_prefix_8_byte:
14431 size_needed = 8;
14432 break;
14433 case rep_prefix_4_byte:
14434 size_needed = 4;
14435 break;
14436 case rep_prefix_1_byte:
14437 case loop_1_byte:
14438 size_needed = 1;
14439 break;
14440 }
14441 epilogue_size_needed = size_needed;
14442
14443 /* Step 1: Prologue guard. */
14444
14445 /* Alignment code needs count to be in register. */
14446 if (CONST_INT_P (count_exp) && desired_align > align)
14447 {
14448 enum machine_mode mode = SImode;
14449 if (TARGET_64BIT && (count & ~0xffffffff))
14450 mode = DImode;
14451 count_exp = force_reg (mode, count_exp);
14452 }
14453 /* Do the cheap promotion to allow better CSE across the
14454 main loop and epilogue (ie one load of the big constant in the
14455 front of all code. */
14456 if (CONST_INT_P (val_exp))
14457 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14458 desired_align, align);
14459 /* Ensure that alignment prologue won't copy past end of block. */
14460 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14461 {
14462 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14463 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14464 Make sure it is power of 2. */
14465 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14466
14467 /* To improve performance of small blocks, we jump around the VAL
14468 promoting mode. This mean that if the promoted VAL is not constant,
14469 we might not use it in the epilogue and have to use byte
14470 loop variant. */
14471 if (epilogue_size_needed > 2 && !promoted_val)
14472 force_loopy_epilogue = true;
14473 label = gen_label_rtx ();
14474 emit_cmp_and_jump_insns (count_exp,
14475 GEN_INT (epilogue_size_needed),
14476 LTU, 0, counter_mode (count_exp), 1, label);
14477 if (GET_CODE (count_exp) == CONST_INT)
14478 ;
14479 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14480 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14481 else
14482 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14483 }
14484 if (dynamic_check != -1)
14485 {
14486 rtx hot_label = gen_label_rtx ();
14487 jump_around_label = gen_label_rtx ();
14488 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14489 LEU, 0, counter_mode (count_exp), 1, hot_label);
14490 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14491 set_storage_via_libcall (dst, count_exp, val_exp, false);
14492 emit_jump (jump_around_label);
14493 emit_label (hot_label);
14494 }
14495
14496 /* Step 2: Alignment prologue. */
14497
14498 /* Do the expensive promotion once we branched off the small blocks. */
14499 if (!promoted_val)
14500 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14501 desired_align, align);
14502 gcc_assert (desired_align >= 1 && align >= 1);
14503
14504 if (desired_align > align)
14505 {
14506 /* Except for the first move in epilogue, we no longer know
14507 constant offset in aliasing info. It don't seems to worth
14508 the pain to maintain it for the first move, so throw away
14509 the info early. */
14510 dst = change_address (dst, BLKmode, destreg);
14511 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14512 desired_align);
14513 }
14514 if (label && size_needed == 1)
14515 {
14516 emit_label (label);
14517 LABEL_NUSES (label) = 1;
14518 label = NULL;
14519 }
14520
14521 /* Step 3: Main loop. */
14522
14523 switch (alg)
14524 {
14525 case libcall:
14526 case no_stringop:
14527 gcc_unreachable ();
14528 case loop_1_byte:
14529 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14530 count_exp, QImode, 1, expected_size);
14531 break;
14532 case loop:
14533 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14534 count_exp, Pmode, 1, expected_size);
14535 break;
14536 case unrolled_loop:
14537 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14538 count_exp, Pmode, 4, expected_size);
14539 break;
14540 case rep_prefix_8_byte:
14541 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14542 DImode);
14543 break;
14544 case rep_prefix_4_byte:
14545 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14546 SImode);
14547 break;
14548 case rep_prefix_1_byte:
14549 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14550 QImode);
14551 break;
14552 }
14553 /* Adjust properly the offset of src and dest memory for aliasing. */
14554 if (CONST_INT_P (count_exp))
14555 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14556 (count / size_needed) * size_needed);
14557 else
14558 dst = change_address (dst, BLKmode, destreg);
14559
14560 /* Step 4: Epilogue to copy the remaining bytes. */
14561
14562 if (label)
14563 {
14564 /* When the main loop is done, COUNT_EXP might hold original count,
14565 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14566 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14567 bytes. Compensate if needed. */
14568
14569 if (size_needed < desired_align - align)
14570 {
14571 tmp =
14572 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14573 GEN_INT (size_needed - 1), count_exp, 1,
14574 OPTAB_DIRECT);
14575 size_needed = desired_align - align + 1;
14576 if (tmp != count_exp)
14577 emit_move_insn (count_exp, tmp);
14578 }
14579 emit_label (label);
14580 LABEL_NUSES (label) = 1;
14581 }
14582 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14583 {
14584 if (force_loopy_epilogue)
14585 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14586 size_needed);
14587 else
14588 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14589 size_needed);
14590 }
14591 if (jump_around_label)
14592 emit_label (jump_around_label);
14593 return 1;
14594 }
14595
14596 /* Expand strlen. */
14597 int
14598 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14599 {
14600 rtx addr, scratch1, scratch2, scratch3, scratch4;
14601
14602 /* The generic case of strlen expander is long. Avoid it's
14603 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14604
14605 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14606 && !TARGET_INLINE_ALL_STRINGOPS
14607 && !optimize_size
14608 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14609 return 0;
14610
14611 addr = force_reg (Pmode, XEXP (src, 0));
14612 scratch1 = gen_reg_rtx (Pmode);
14613
14614 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14615 && !optimize_size)
14616 {
14617 /* Well it seems that some optimizer does not combine a call like
14618 foo(strlen(bar), strlen(bar));
14619 when the move and the subtraction is done here. It does calculate
14620 the length just once when these instructions are done inside of
14621 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14622 often used and I use one fewer register for the lifetime of
14623 output_strlen_unroll() this is better. */
14624
14625 emit_move_insn (out, addr);
14626
14627 ix86_expand_strlensi_unroll_1 (out, src, align);
14628
14629 /* strlensi_unroll_1 returns the address of the zero at the end of
14630 the string, like memchr(), so compute the length by subtracting
14631 the start address. */
14632 if (TARGET_64BIT)
14633 emit_insn (gen_subdi3 (out, out, addr));
14634 else
14635 emit_insn (gen_subsi3 (out, out, addr));
14636 }
14637 else
14638 {
14639 rtx unspec;
14640 scratch2 = gen_reg_rtx (Pmode);
14641 scratch3 = gen_reg_rtx (Pmode);
14642 scratch4 = force_reg (Pmode, constm1_rtx);
14643
14644 emit_move_insn (scratch3, addr);
14645 eoschar = force_reg (QImode, eoschar);
14646
14647 src = replace_equiv_address_nv (src, scratch3);
14648
14649 /* If .md starts supporting :P, this can be done in .md. */
14650 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14651 scratch4), UNSPEC_SCAS);
14652 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14653 if (TARGET_64BIT)
14654 {
14655 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14656 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14657 }
14658 else
14659 {
14660 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14661 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14662 }
14663 }
14664 return 1;
14665 }
14666
14667 /* Expand the appropriate insns for doing strlen if not just doing
14668 repnz; scasb
14669
14670 out = result, initialized with the start address
14671 align_rtx = alignment of the address.
14672 scratch = scratch register, initialized with the startaddress when
14673 not aligned, otherwise undefined
14674
14675 This is just the body. It needs the initializations mentioned above and
14676 some address computing at the end. These things are done in i386.md. */
14677
14678 static void
14679 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14680 {
14681 int align;
14682 rtx tmp;
14683 rtx align_2_label = NULL_RTX;
14684 rtx align_3_label = NULL_RTX;
14685 rtx align_4_label = gen_label_rtx ();
14686 rtx end_0_label = gen_label_rtx ();
14687 rtx mem;
14688 rtx tmpreg = gen_reg_rtx (SImode);
14689 rtx scratch = gen_reg_rtx (SImode);
14690 rtx cmp;
14691
14692 align = 0;
14693 if (CONST_INT_P (align_rtx))
14694 align = INTVAL (align_rtx);
14695
14696 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14697
14698 /* Is there a known alignment and is it less than 4? */
14699 if (align < 4)
14700 {
14701 rtx scratch1 = gen_reg_rtx (Pmode);
14702 emit_move_insn (scratch1, out);
14703 /* Is there a known alignment and is it not 2? */
14704 if (align != 2)
14705 {
14706 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14707 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14708
14709 /* Leave just the 3 lower bits. */
14710 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14711 NULL_RTX, 0, OPTAB_WIDEN);
14712
14713 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14714 Pmode, 1, align_4_label);
14715 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14716 Pmode, 1, align_2_label);
14717 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14718 Pmode, 1, align_3_label);
14719 }
14720 else
14721 {
14722 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14723 check if is aligned to 4 - byte. */
14724
14725 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14726 NULL_RTX, 0, OPTAB_WIDEN);
14727
14728 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14729 Pmode, 1, align_4_label);
14730 }
14731
14732 mem = change_address (src, QImode, out);
14733
14734 /* Now compare the bytes. */
14735
14736 /* Compare the first n unaligned byte on a byte per byte basis. */
14737 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14738 QImode, 1, end_0_label);
14739
14740 /* Increment the address. */
14741 if (TARGET_64BIT)
14742 emit_insn (gen_adddi3 (out, out, const1_rtx));
14743 else
14744 emit_insn (gen_addsi3 (out, out, const1_rtx));
14745
14746 /* Not needed with an alignment of 2 */
14747 if (align != 2)
14748 {
14749 emit_label (align_2_label);
14750
14751 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14752 end_0_label);
14753
14754 if (TARGET_64BIT)
14755 emit_insn (gen_adddi3 (out, out, const1_rtx));
14756 else
14757 emit_insn (gen_addsi3 (out, out, const1_rtx));
14758
14759 emit_label (align_3_label);
14760 }
14761
14762 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14763 end_0_label);
14764
14765 if (TARGET_64BIT)
14766 emit_insn (gen_adddi3 (out, out, const1_rtx));
14767 else
14768 emit_insn (gen_addsi3 (out, out, const1_rtx));
14769 }
14770
14771 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14772 align this loop. It gives only huge programs, but does not help to
14773 speed up. */
14774 emit_label (align_4_label);
14775
14776 mem = change_address (src, SImode, out);
14777 emit_move_insn (scratch, mem);
14778 if (TARGET_64BIT)
14779 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14780 else
14781 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14782
14783 /* This formula yields a nonzero result iff one of the bytes is zero.
14784 This saves three branches inside loop and many cycles. */
14785
14786 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14787 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14788 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14789 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14790 gen_int_mode (0x80808080, SImode)));
14791 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14792 align_4_label);
14793
14794 if (TARGET_CMOVE)
14795 {
14796 rtx reg = gen_reg_rtx (SImode);
14797 rtx reg2 = gen_reg_rtx (Pmode);
14798 emit_move_insn (reg, tmpreg);
14799 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14800
14801 /* If zero is not in the first two bytes, move two bytes forward. */
14802 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14803 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14804 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14805 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14806 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14807 reg,
14808 tmpreg)));
14809 /* Emit lea manually to avoid clobbering of flags. */
14810 emit_insn (gen_rtx_SET (SImode, reg2,
14811 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14812
14813 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14814 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14815 emit_insn (gen_rtx_SET (VOIDmode, out,
14816 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14817 reg2,
14818 out)));
14819
14820 }
14821 else
14822 {
14823 rtx end_2_label = gen_label_rtx ();
14824 /* Is zero in the first two bytes? */
14825
14826 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14827 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14828 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14829 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14830 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14831 pc_rtx);
14832 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14833 JUMP_LABEL (tmp) = end_2_label;
14834
14835 /* Not in the first two. Move two bytes forward. */
14836 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14837 if (TARGET_64BIT)
14838 emit_insn (gen_adddi3 (out, out, const2_rtx));
14839 else
14840 emit_insn (gen_addsi3 (out, out, const2_rtx));
14841
14842 emit_label (end_2_label);
14843
14844 }
14845
14846 /* Avoid branch in fixing the byte. */
14847 tmpreg = gen_lowpart (QImode, tmpreg);
14848 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14849 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14850 if (TARGET_64BIT)
14851 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14852 else
14853 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14854
14855 emit_label (end_0_label);
14856 }
14857
14858 void
14859 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14860 rtx callarg2 ATTRIBUTE_UNUSED,
14861 rtx pop, int sibcall)
14862 {
14863 rtx use = NULL, call;
14864
14865 if (pop == const0_rtx)
14866 pop = NULL;
14867 gcc_assert (!TARGET_64BIT || !pop);
14868
14869 if (TARGET_MACHO && !TARGET_64BIT)
14870 {
14871 #if TARGET_MACHO
14872 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14873 fnaddr = machopic_indirect_call_target (fnaddr);
14874 #endif
14875 }
14876 else
14877 {
14878 /* Static functions and indirect calls don't need the pic register. */
14879 if (! TARGET_64BIT && flag_pic
14880 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14881 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14882 use_reg (&use, pic_offset_table_rtx);
14883 }
14884
14885 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14886 {
14887 rtx al = gen_rtx_REG (QImode, 0);
14888 emit_move_insn (al, callarg2);
14889 use_reg (&use, al);
14890 }
14891
14892 if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14893 {
14894 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14895 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14896 }
14897 if (sibcall && TARGET_64BIT
14898 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14899 {
14900 rtx addr;
14901 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14902 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14903 emit_move_insn (fnaddr, addr);
14904 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14905 }
14906
14907 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14908 if (retval)
14909 call = gen_rtx_SET (VOIDmode, retval, call);
14910 if (pop)
14911 {
14912 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14913 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14914 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14915 }
14916
14917 call = emit_call_insn (call);
14918 if (use)
14919 CALL_INSN_FUNCTION_USAGE (call) = use;
14920 }
14921
14922 \f
14923 /* Clear stack slot assignments remembered from previous functions.
14924 This is called from INIT_EXPANDERS once before RTL is emitted for each
14925 function. */
14926
14927 static struct machine_function *
14928 ix86_init_machine_status (void)
14929 {
14930 struct machine_function *f;
14931
14932 f = ggc_alloc_cleared (sizeof (struct machine_function));
14933 f->use_fast_prologue_epilogue_nregs = -1;
14934 f->tls_descriptor_call_expanded_p = 0;
14935
14936 return f;
14937 }
14938
14939 /* Return a MEM corresponding to a stack slot with mode MODE.
14940 Allocate a new slot if necessary.
14941
14942 The RTL for a function can have several slots available: N is
14943 which slot to use. */
14944
14945 rtx
14946 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14947 {
14948 struct stack_local_entry *s;
14949
14950 gcc_assert (n < MAX_386_STACK_LOCALS);
14951
14952 for (s = ix86_stack_locals; s; s = s->next)
14953 if (s->mode == mode && s->n == n)
14954 return copy_rtx (s->rtl);
14955
14956 s = (struct stack_local_entry *)
14957 ggc_alloc (sizeof (struct stack_local_entry));
14958 s->n = n;
14959 s->mode = mode;
14960 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14961
14962 s->next = ix86_stack_locals;
14963 ix86_stack_locals = s;
14964 return s->rtl;
14965 }
14966
14967 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14968
14969 static GTY(()) rtx ix86_tls_symbol;
14970 rtx
14971 ix86_tls_get_addr (void)
14972 {
14973
14974 if (!ix86_tls_symbol)
14975 {
14976 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14977 (TARGET_ANY_GNU_TLS
14978 && !TARGET_64BIT)
14979 ? "___tls_get_addr"
14980 : "__tls_get_addr");
14981 }
14982
14983 return ix86_tls_symbol;
14984 }
14985
14986 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14987
14988 static GTY(()) rtx ix86_tls_module_base_symbol;
14989 rtx
14990 ix86_tls_module_base (void)
14991 {
14992
14993 if (!ix86_tls_module_base_symbol)
14994 {
14995 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14996 "_TLS_MODULE_BASE_");
14997 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14998 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14999 }
15000
15001 return ix86_tls_module_base_symbol;
15002 }
15003 \f
15004 /* Calculate the length of the memory address in the instruction
15005 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15006
15007 int
15008 memory_address_length (rtx addr)
15009 {
15010 struct ix86_address parts;
15011 rtx base, index, disp;
15012 int len;
15013 int ok;
15014
15015 if (GET_CODE (addr) == PRE_DEC
15016 || GET_CODE (addr) == POST_INC
15017 || GET_CODE (addr) == PRE_MODIFY
15018 || GET_CODE (addr) == POST_MODIFY)
15019 return 0;
15020
15021 ok = ix86_decompose_address (addr, &parts);
15022 gcc_assert (ok);
15023
15024 if (parts.base && GET_CODE (parts.base) == SUBREG)
15025 parts.base = SUBREG_REG (parts.base);
15026 if (parts.index && GET_CODE (parts.index) == SUBREG)
15027 parts.index = SUBREG_REG (parts.index);
15028
15029 base = parts.base;
15030 index = parts.index;
15031 disp = parts.disp;
15032 len = 0;
15033
15034 /* Rule of thumb:
15035 - esp as the base always wants an index,
15036 - ebp as the base always wants a displacement. */
15037
15038 /* Register Indirect. */
15039 if (base && !index && !disp)
15040 {
15041 /* esp (for its index) and ebp (for its displacement) need
15042 the two-byte modrm form. */
15043 if (addr == stack_pointer_rtx
15044 || addr == arg_pointer_rtx
15045 || addr == frame_pointer_rtx
15046 || addr == hard_frame_pointer_rtx)
15047 len = 1;
15048 }
15049
15050 /* Direct Addressing. */
15051 else if (disp && !base && !index)
15052 len = 4;
15053
15054 else
15055 {
15056 /* Find the length of the displacement constant. */
15057 if (disp)
15058 {
15059 if (base && satisfies_constraint_K (disp))
15060 len = 1;
15061 else
15062 len = 4;
15063 }
15064 /* ebp always wants a displacement. */
15065 else if (base == hard_frame_pointer_rtx)
15066 len = 1;
15067
15068 /* An index requires the two-byte modrm form.... */
15069 if (index
15070 /* ...like esp, which always wants an index. */
15071 || base == stack_pointer_rtx
15072 || base == arg_pointer_rtx
15073 || base == frame_pointer_rtx)
15074 len += 1;
15075 }
15076
15077 return len;
15078 }
15079
15080 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15081 is set, expect that insn have 8bit immediate alternative. */
15082 int
15083 ix86_attr_length_immediate_default (rtx insn, int shortform)
15084 {
15085 int len = 0;
15086 int i;
15087 extract_insn_cached (insn);
15088 for (i = recog_data.n_operands - 1; i >= 0; --i)
15089 if (CONSTANT_P (recog_data.operand[i]))
15090 {
15091 gcc_assert (!len);
15092 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15093 len = 1;
15094 else
15095 {
15096 switch (get_attr_mode (insn))
15097 {
15098 case MODE_QI:
15099 len+=1;
15100 break;
15101 case MODE_HI:
15102 len+=2;
15103 break;
15104 case MODE_SI:
15105 len+=4;
15106 break;
15107 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15108 case MODE_DI:
15109 len+=4;
15110 break;
15111 default:
15112 fatal_insn ("unknown insn mode", insn);
15113 }
15114 }
15115 }
15116 return len;
15117 }
15118 /* Compute default value for "length_address" attribute. */
15119 int
15120 ix86_attr_length_address_default (rtx insn)
15121 {
15122 int i;
15123
15124 if (get_attr_type (insn) == TYPE_LEA)
15125 {
15126 rtx set = PATTERN (insn);
15127
15128 if (GET_CODE (set) == PARALLEL)
15129 set = XVECEXP (set, 0, 0);
15130
15131 gcc_assert (GET_CODE (set) == SET);
15132
15133 return memory_address_length (SET_SRC (set));
15134 }
15135
15136 extract_insn_cached (insn);
15137 for (i = recog_data.n_operands - 1; i >= 0; --i)
15138 if (MEM_P (recog_data.operand[i]))
15139 {
15140 return memory_address_length (XEXP (recog_data.operand[i], 0));
15141 break;
15142 }
15143 return 0;
15144 }
15145 \f
15146 /* Return the maximum number of instructions a cpu can issue. */
15147
15148 static int
15149 ix86_issue_rate (void)
15150 {
15151 switch (ix86_tune)
15152 {
15153 case PROCESSOR_PENTIUM:
15154 case PROCESSOR_K6:
15155 return 2;
15156
15157 case PROCESSOR_PENTIUMPRO:
15158 case PROCESSOR_PENTIUM4:
15159 case PROCESSOR_ATHLON:
15160 case PROCESSOR_K8:
15161 case PROCESSOR_AMDFAM10:
15162 case PROCESSOR_NOCONA:
15163 case PROCESSOR_GENERIC32:
15164 case PROCESSOR_GENERIC64:
15165 return 3;
15166
15167 case PROCESSOR_CORE2:
15168 return 4;
15169
15170 default:
15171 return 1;
15172 }
15173 }
15174
15175 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15176 by DEP_INSN and nothing set by DEP_INSN. */
15177
15178 static int
15179 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15180 {
15181 rtx set, set2;
15182
15183 /* Simplify the test for uninteresting insns. */
15184 if (insn_type != TYPE_SETCC
15185 && insn_type != TYPE_ICMOV
15186 && insn_type != TYPE_FCMOV
15187 && insn_type != TYPE_IBR)
15188 return 0;
15189
15190 if ((set = single_set (dep_insn)) != 0)
15191 {
15192 set = SET_DEST (set);
15193 set2 = NULL_RTX;
15194 }
15195 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15196 && XVECLEN (PATTERN (dep_insn), 0) == 2
15197 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15198 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15199 {
15200 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15201 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15202 }
15203 else
15204 return 0;
15205
15206 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15207 return 0;
15208
15209 /* This test is true if the dependent insn reads the flags but
15210 not any other potentially set register. */
15211 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15212 return 0;
15213
15214 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15215 return 0;
15216
15217 return 1;
15218 }
15219
15220 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15221 address with operands set by DEP_INSN. */
15222
15223 static int
15224 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15225 {
15226 rtx addr;
15227
15228 if (insn_type == TYPE_LEA
15229 && TARGET_PENTIUM)
15230 {
15231 addr = PATTERN (insn);
15232
15233 if (GET_CODE (addr) == PARALLEL)
15234 addr = XVECEXP (addr, 0, 0);
15235
15236 gcc_assert (GET_CODE (addr) == SET);
15237
15238 addr = SET_SRC (addr);
15239 }
15240 else
15241 {
15242 int i;
15243 extract_insn_cached (insn);
15244 for (i = recog_data.n_operands - 1; i >= 0; --i)
15245 if (MEM_P (recog_data.operand[i]))
15246 {
15247 addr = XEXP (recog_data.operand[i], 0);
15248 goto found;
15249 }
15250 return 0;
15251 found:;
15252 }
15253
15254 return modified_in_p (addr, dep_insn);
15255 }
15256
15257 static int
15258 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15259 {
15260 enum attr_type insn_type, dep_insn_type;
15261 enum attr_memory memory;
15262 rtx set, set2;
15263 int dep_insn_code_number;
15264
15265 /* Anti and output dependencies have zero cost on all CPUs. */
15266 if (REG_NOTE_KIND (link) != 0)
15267 return 0;
15268
15269 dep_insn_code_number = recog_memoized (dep_insn);
15270
15271 /* If we can't recognize the insns, we can't really do anything. */
15272 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15273 return cost;
15274
15275 insn_type = get_attr_type (insn);
15276 dep_insn_type = get_attr_type (dep_insn);
15277
15278 switch (ix86_tune)
15279 {
15280 case PROCESSOR_PENTIUM:
15281 /* Address Generation Interlock adds a cycle of latency. */
15282 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15283 cost += 1;
15284
15285 /* ??? Compares pair with jump/setcc. */
15286 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15287 cost = 0;
15288
15289 /* Floating point stores require value to be ready one cycle earlier. */
15290 if (insn_type == TYPE_FMOV
15291 && get_attr_memory (insn) == MEMORY_STORE
15292 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15293 cost += 1;
15294 break;
15295
15296 case PROCESSOR_PENTIUMPRO:
15297 memory = get_attr_memory (insn);
15298
15299 /* INT->FP conversion is expensive. */
15300 if (get_attr_fp_int_src (dep_insn))
15301 cost += 5;
15302
15303 /* There is one cycle extra latency between an FP op and a store. */
15304 if (insn_type == TYPE_FMOV
15305 && (set = single_set (dep_insn)) != NULL_RTX
15306 && (set2 = single_set (insn)) != NULL_RTX
15307 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15308 && MEM_P (SET_DEST (set2)))
15309 cost += 1;
15310
15311 /* Show ability of reorder buffer to hide latency of load by executing
15312 in parallel with previous instruction in case
15313 previous instruction is not needed to compute the address. */
15314 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15315 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15316 {
15317 /* Claim moves to take one cycle, as core can issue one load
15318 at time and the next load can start cycle later. */
15319 if (dep_insn_type == TYPE_IMOV
15320 || dep_insn_type == TYPE_FMOV)
15321 cost = 1;
15322 else if (cost > 1)
15323 cost--;
15324 }
15325 break;
15326
15327 case PROCESSOR_K6:
15328 memory = get_attr_memory (insn);
15329
15330 /* The esp dependency is resolved before the instruction is really
15331 finished. */
15332 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15333 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15334 return 1;
15335
15336 /* INT->FP conversion is expensive. */
15337 if (get_attr_fp_int_src (dep_insn))
15338 cost += 5;
15339
15340 /* Show ability of reorder buffer to hide latency of load by executing
15341 in parallel with previous instruction in case
15342 previous instruction is not needed to compute the address. */
15343 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15344 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15345 {
15346 /* Claim moves to take one cycle, as core can issue one load
15347 at time and the next load can start cycle later. */
15348 if (dep_insn_type == TYPE_IMOV
15349 || dep_insn_type == TYPE_FMOV)
15350 cost = 1;
15351 else if (cost > 2)
15352 cost -= 2;
15353 else
15354 cost = 1;
15355 }
15356 break;
15357
15358 case PROCESSOR_ATHLON:
15359 case PROCESSOR_K8:
15360 case PROCESSOR_AMDFAM10:
15361 case PROCESSOR_GENERIC32:
15362 case PROCESSOR_GENERIC64:
15363 memory = get_attr_memory (insn);
15364
15365 /* Show ability of reorder buffer to hide latency of load by executing
15366 in parallel with previous instruction in case
15367 previous instruction is not needed to compute the address. */
15368 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15369 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15370 {
15371 enum attr_unit unit = get_attr_unit (insn);
15372 int loadcost = 3;
15373
15374 /* Because of the difference between the length of integer and
15375 floating unit pipeline preparation stages, the memory operands
15376 for floating point are cheaper.
15377
15378 ??? For Athlon it the difference is most probably 2. */
15379 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15380 loadcost = 3;
15381 else
15382 loadcost = TARGET_ATHLON ? 2 : 0;
15383
15384 if (cost >= loadcost)
15385 cost -= loadcost;
15386 else
15387 cost = 0;
15388 }
15389
15390 default:
15391 break;
15392 }
15393
15394 return cost;
15395 }
15396
15397 /* How many alternative schedules to try. This should be as wide as the
15398 scheduling freedom in the DFA, but no wider. Making this value too
15399 large results extra work for the scheduler. */
15400
15401 static int
15402 ia32_multipass_dfa_lookahead (void)
15403 {
15404 if (ix86_tune == PROCESSOR_PENTIUM)
15405 return 2;
15406
15407 if (ix86_tune == PROCESSOR_PENTIUMPRO
15408 || ix86_tune == PROCESSOR_K6)
15409 return 1;
15410
15411 else
15412 return 0;
15413 }
15414
15415 \f
15416 /* Compute the alignment given to a constant that is being placed in memory.
15417 EXP is the constant and ALIGN is the alignment that the object would
15418 ordinarily have.
15419 The value of this function is used instead of that alignment to align
15420 the object. */
15421
15422 int
15423 ix86_constant_alignment (tree exp, int align)
15424 {
15425 if (TREE_CODE (exp) == REAL_CST)
15426 {
15427 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15428 return 64;
15429 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15430 return 128;
15431 }
15432 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15433 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15434 return BITS_PER_WORD;
15435
15436 return align;
15437 }
15438
15439 /* Compute the alignment for a static variable.
15440 TYPE is the data type, and ALIGN is the alignment that
15441 the object would ordinarily have. The value of this function is used
15442 instead of that alignment to align the object. */
15443
15444 int
15445 ix86_data_alignment (tree type, int align)
15446 {
15447 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15448
15449 if (AGGREGATE_TYPE_P (type)
15450 && TYPE_SIZE (type)
15451 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15452 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15453 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15454 && align < max_align)
15455 align = max_align;
15456
15457 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15458 to 16byte boundary. */
15459 if (TARGET_64BIT)
15460 {
15461 if (AGGREGATE_TYPE_P (type)
15462 && TYPE_SIZE (type)
15463 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15464 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15465 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15466 return 128;
15467 }
15468
15469 if (TREE_CODE (type) == ARRAY_TYPE)
15470 {
15471 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15472 return 64;
15473 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15474 return 128;
15475 }
15476 else if (TREE_CODE (type) == COMPLEX_TYPE)
15477 {
15478
15479 if (TYPE_MODE (type) == DCmode && align < 64)
15480 return 64;
15481 if (TYPE_MODE (type) == XCmode && align < 128)
15482 return 128;
15483 }
15484 else if ((TREE_CODE (type) == RECORD_TYPE
15485 || TREE_CODE (type) == UNION_TYPE
15486 || TREE_CODE (type) == QUAL_UNION_TYPE)
15487 && TYPE_FIELDS (type))
15488 {
15489 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15490 return 64;
15491 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15492 return 128;
15493 }
15494 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15495 || TREE_CODE (type) == INTEGER_TYPE)
15496 {
15497 if (TYPE_MODE (type) == DFmode && align < 64)
15498 return 64;
15499 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15500 return 128;
15501 }
15502
15503 return align;
15504 }
15505
15506 /* Compute the alignment for a local variable.
15507 TYPE is the data type, and ALIGN is the alignment that
15508 the object would ordinarily have. The value of this macro is used
15509 instead of that alignment to align the object. */
15510
15511 int
15512 ix86_local_alignment (tree type, int align)
15513 {
15514 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15515 to 16byte boundary. */
15516 if (TARGET_64BIT)
15517 {
15518 if (AGGREGATE_TYPE_P (type)
15519 && TYPE_SIZE (type)
15520 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15521 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15522 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15523 return 128;
15524 }
15525 if (TREE_CODE (type) == ARRAY_TYPE)
15526 {
15527 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15528 return 64;
15529 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15530 return 128;
15531 }
15532 else if (TREE_CODE (type) == COMPLEX_TYPE)
15533 {
15534 if (TYPE_MODE (type) == DCmode && align < 64)
15535 return 64;
15536 if (TYPE_MODE (type) == XCmode && align < 128)
15537 return 128;
15538 }
15539 else if ((TREE_CODE (type) == RECORD_TYPE
15540 || TREE_CODE (type) == UNION_TYPE
15541 || TREE_CODE (type) == QUAL_UNION_TYPE)
15542 && TYPE_FIELDS (type))
15543 {
15544 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15545 return 64;
15546 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15547 return 128;
15548 }
15549 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15550 || TREE_CODE (type) == INTEGER_TYPE)
15551 {
15552
15553 if (TYPE_MODE (type) == DFmode && align < 64)
15554 return 64;
15555 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15556 return 128;
15557 }
15558 return align;
15559 }
15560 \f
15561 /* Emit RTL insns to initialize the variable parts of a trampoline.
15562 FNADDR is an RTX for the address of the function's pure code.
15563 CXT is an RTX for the static chain value for the function. */
15564 void
15565 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15566 {
15567 if (!TARGET_64BIT)
15568 {
15569 /* Compute offset from the end of the jmp to the target function. */
15570 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15571 plus_constant (tramp, 10),
15572 NULL_RTX, 1, OPTAB_DIRECT);
15573 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15574 gen_int_mode (0xb9, QImode));
15575 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15576 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15577 gen_int_mode (0xe9, QImode));
15578 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15579 }
15580 else
15581 {
15582 int offset = 0;
15583 /* Try to load address using shorter movl instead of movabs.
15584 We may want to support movq for kernel mode, but kernel does not use
15585 trampolines at the moment. */
15586 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15587 {
15588 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15589 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15590 gen_int_mode (0xbb41, HImode));
15591 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15592 gen_lowpart (SImode, fnaddr));
15593 offset += 6;
15594 }
15595 else
15596 {
15597 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15598 gen_int_mode (0xbb49, HImode));
15599 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15600 fnaddr);
15601 offset += 10;
15602 }
15603 /* Load static chain using movabs to r10. */
15604 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15605 gen_int_mode (0xba49, HImode));
15606 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15607 cxt);
15608 offset += 10;
15609 /* Jump to the r11 */
15610 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15611 gen_int_mode (0xff49, HImode));
15612 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15613 gen_int_mode (0xe3, QImode));
15614 offset += 3;
15615 gcc_assert (offset <= TRAMPOLINE_SIZE);
15616 }
15617
15618 #ifdef ENABLE_EXECUTE_STACK
15619 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15620 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15621 #endif
15622 }
15623 \f
15624 /* Codes for all the SSE/MMX builtins. */
15625 enum ix86_builtins
15626 {
15627 IX86_BUILTIN_ADDPS,
15628 IX86_BUILTIN_ADDSS,
15629 IX86_BUILTIN_DIVPS,
15630 IX86_BUILTIN_DIVSS,
15631 IX86_BUILTIN_MULPS,
15632 IX86_BUILTIN_MULSS,
15633 IX86_BUILTIN_SUBPS,
15634 IX86_BUILTIN_SUBSS,
15635
15636 IX86_BUILTIN_CMPEQPS,
15637 IX86_BUILTIN_CMPLTPS,
15638 IX86_BUILTIN_CMPLEPS,
15639 IX86_BUILTIN_CMPGTPS,
15640 IX86_BUILTIN_CMPGEPS,
15641 IX86_BUILTIN_CMPNEQPS,
15642 IX86_BUILTIN_CMPNLTPS,
15643 IX86_BUILTIN_CMPNLEPS,
15644 IX86_BUILTIN_CMPNGTPS,
15645 IX86_BUILTIN_CMPNGEPS,
15646 IX86_BUILTIN_CMPORDPS,
15647 IX86_BUILTIN_CMPUNORDPS,
15648 IX86_BUILTIN_CMPEQSS,
15649 IX86_BUILTIN_CMPLTSS,
15650 IX86_BUILTIN_CMPLESS,
15651 IX86_BUILTIN_CMPNEQSS,
15652 IX86_BUILTIN_CMPNLTSS,
15653 IX86_BUILTIN_CMPNLESS,
15654 IX86_BUILTIN_CMPNGTSS,
15655 IX86_BUILTIN_CMPNGESS,
15656 IX86_BUILTIN_CMPORDSS,
15657 IX86_BUILTIN_CMPUNORDSS,
15658
15659 IX86_BUILTIN_COMIEQSS,
15660 IX86_BUILTIN_COMILTSS,
15661 IX86_BUILTIN_COMILESS,
15662 IX86_BUILTIN_COMIGTSS,
15663 IX86_BUILTIN_COMIGESS,
15664 IX86_BUILTIN_COMINEQSS,
15665 IX86_BUILTIN_UCOMIEQSS,
15666 IX86_BUILTIN_UCOMILTSS,
15667 IX86_BUILTIN_UCOMILESS,
15668 IX86_BUILTIN_UCOMIGTSS,
15669 IX86_BUILTIN_UCOMIGESS,
15670 IX86_BUILTIN_UCOMINEQSS,
15671
15672 IX86_BUILTIN_CVTPI2PS,
15673 IX86_BUILTIN_CVTPS2PI,
15674 IX86_BUILTIN_CVTSI2SS,
15675 IX86_BUILTIN_CVTSI642SS,
15676 IX86_BUILTIN_CVTSS2SI,
15677 IX86_BUILTIN_CVTSS2SI64,
15678 IX86_BUILTIN_CVTTPS2PI,
15679 IX86_BUILTIN_CVTTSS2SI,
15680 IX86_BUILTIN_CVTTSS2SI64,
15681
15682 IX86_BUILTIN_MAXPS,
15683 IX86_BUILTIN_MAXSS,
15684 IX86_BUILTIN_MINPS,
15685 IX86_BUILTIN_MINSS,
15686
15687 IX86_BUILTIN_LOADUPS,
15688 IX86_BUILTIN_STOREUPS,
15689 IX86_BUILTIN_MOVSS,
15690
15691 IX86_BUILTIN_MOVHLPS,
15692 IX86_BUILTIN_MOVLHPS,
15693 IX86_BUILTIN_LOADHPS,
15694 IX86_BUILTIN_LOADLPS,
15695 IX86_BUILTIN_STOREHPS,
15696 IX86_BUILTIN_STORELPS,
15697
15698 IX86_BUILTIN_MASKMOVQ,
15699 IX86_BUILTIN_MOVMSKPS,
15700 IX86_BUILTIN_PMOVMSKB,
15701
15702 IX86_BUILTIN_MOVNTPS,
15703 IX86_BUILTIN_MOVNTQ,
15704
15705 IX86_BUILTIN_LOADDQU,
15706 IX86_BUILTIN_STOREDQU,
15707
15708 IX86_BUILTIN_PACKSSWB,
15709 IX86_BUILTIN_PACKSSDW,
15710 IX86_BUILTIN_PACKUSWB,
15711
15712 IX86_BUILTIN_PADDB,
15713 IX86_BUILTIN_PADDW,
15714 IX86_BUILTIN_PADDD,
15715 IX86_BUILTIN_PADDQ,
15716 IX86_BUILTIN_PADDSB,
15717 IX86_BUILTIN_PADDSW,
15718 IX86_BUILTIN_PADDUSB,
15719 IX86_BUILTIN_PADDUSW,
15720 IX86_BUILTIN_PSUBB,
15721 IX86_BUILTIN_PSUBW,
15722 IX86_BUILTIN_PSUBD,
15723 IX86_BUILTIN_PSUBQ,
15724 IX86_BUILTIN_PSUBSB,
15725 IX86_BUILTIN_PSUBSW,
15726 IX86_BUILTIN_PSUBUSB,
15727 IX86_BUILTIN_PSUBUSW,
15728
15729 IX86_BUILTIN_PAND,
15730 IX86_BUILTIN_PANDN,
15731 IX86_BUILTIN_POR,
15732 IX86_BUILTIN_PXOR,
15733
15734 IX86_BUILTIN_PAVGB,
15735 IX86_BUILTIN_PAVGW,
15736
15737 IX86_BUILTIN_PCMPEQB,
15738 IX86_BUILTIN_PCMPEQW,
15739 IX86_BUILTIN_PCMPEQD,
15740 IX86_BUILTIN_PCMPGTB,
15741 IX86_BUILTIN_PCMPGTW,
15742 IX86_BUILTIN_PCMPGTD,
15743
15744 IX86_BUILTIN_PMADDWD,
15745
15746 IX86_BUILTIN_PMAXSW,
15747 IX86_BUILTIN_PMAXUB,
15748 IX86_BUILTIN_PMINSW,
15749 IX86_BUILTIN_PMINUB,
15750
15751 IX86_BUILTIN_PMULHUW,
15752 IX86_BUILTIN_PMULHW,
15753 IX86_BUILTIN_PMULLW,
15754
15755 IX86_BUILTIN_PSADBW,
15756 IX86_BUILTIN_PSHUFW,
15757
15758 IX86_BUILTIN_PSLLW,
15759 IX86_BUILTIN_PSLLD,
15760 IX86_BUILTIN_PSLLQ,
15761 IX86_BUILTIN_PSRAW,
15762 IX86_BUILTIN_PSRAD,
15763 IX86_BUILTIN_PSRLW,
15764 IX86_BUILTIN_PSRLD,
15765 IX86_BUILTIN_PSRLQ,
15766 IX86_BUILTIN_PSLLWI,
15767 IX86_BUILTIN_PSLLDI,
15768 IX86_BUILTIN_PSLLQI,
15769 IX86_BUILTIN_PSRAWI,
15770 IX86_BUILTIN_PSRADI,
15771 IX86_BUILTIN_PSRLWI,
15772 IX86_BUILTIN_PSRLDI,
15773 IX86_BUILTIN_PSRLQI,
15774
15775 IX86_BUILTIN_PUNPCKHBW,
15776 IX86_BUILTIN_PUNPCKHWD,
15777 IX86_BUILTIN_PUNPCKHDQ,
15778 IX86_BUILTIN_PUNPCKLBW,
15779 IX86_BUILTIN_PUNPCKLWD,
15780 IX86_BUILTIN_PUNPCKLDQ,
15781
15782 IX86_BUILTIN_SHUFPS,
15783
15784 IX86_BUILTIN_RCPPS,
15785 IX86_BUILTIN_RCPSS,
15786 IX86_BUILTIN_RSQRTPS,
15787 IX86_BUILTIN_RSQRTSS,
15788 IX86_BUILTIN_SQRTPS,
15789 IX86_BUILTIN_SQRTSS,
15790
15791 IX86_BUILTIN_UNPCKHPS,
15792 IX86_BUILTIN_UNPCKLPS,
15793
15794 IX86_BUILTIN_ANDPS,
15795 IX86_BUILTIN_ANDNPS,
15796 IX86_BUILTIN_ORPS,
15797 IX86_BUILTIN_XORPS,
15798
15799 IX86_BUILTIN_EMMS,
15800 IX86_BUILTIN_LDMXCSR,
15801 IX86_BUILTIN_STMXCSR,
15802 IX86_BUILTIN_SFENCE,
15803
15804 /* 3DNow! Original */
15805 IX86_BUILTIN_FEMMS,
15806 IX86_BUILTIN_PAVGUSB,
15807 IX86_BUILTIN_PF2ID,
15808 IX86_BUILTIN_PFACC,
15809 IX86_BUILTIN_PFADD,
15810 IX86_BUILTIN_PFCMPEQ,
15811 IX86_BUILTIN_PFCMPGE,
15812 IX86_BUILTIN_PFCMPGT,
15813 IX86_BUILTIN_PFMAX,
15814 IX86_BUILTIN_PFMIN,
15815 IX86_BUILTIN_PFMUL,
15816 IX86_BUILTIN_PFRCP,
15817 IX86_BUILTIN_PFRCPIT1,
15818 IX86_BUILTIN_PFRCPIT2,
15819 IX86_BUILTIN_PFRSQIT1,
15820 IX86_BUILTIN_PFRSQRT,
15821 IX86_BUILTIN_PFSUB,
15822 IX86_BUILTIN_PFSUBR,
15823 IX86_BUILTIN_PI2FD,
15824 IX86_BUILTIN_PMULHRW,
15825
15826 /* 3DNow! Athlon Extensions */
15827 IX86_BUILTIN_PF2IW,
15828 IX86_BUILTIN_PFNACC,
15829 IX86_BUILTIN_PFPNACC,
15830 IX86_BUILTIN_PI2FW,
15831 IX86_BUILTIN_PSWAPDSI,
15832 IX86_BUILTIN_PSWAPDSF,
15833
15834 /* SSE2 */
15835 IX86_BUILTIN_ADDPD,
15836 IX86_BUILTIN_ADDSD,
15837 IX86_BUILTIN_DIVPD,
15838 IX86_BUILTIN_DIVSD,
15839 IX86_BUILTIN_MULPD,
15840 IX86_BUILTIN_MULSD,
15841 IX86_BUILTIN_SUBPD,
15842 IX86_BUILTIN_SUBSD,
15843
15844 IX86_BUILTIN_CMPEQPD,
15845 IX86_BUILTIN_CMPLTPD,
15846 IX86_BUILTIN_CMPLEPD,
15847 IX86_BUILTIN_CMPGTPD,
15848 IX86_BUILTIN_CMPGEPD,
15849 IX86_BUILTIN_CMPNEQPD,
15850 IX86_BUILTIN_CMPNLTPD,
15851 IX86_BUILTIN_CMPNLEPD,
15852 IX86_BUILTIN_CMPNGTPD,
15853 IX86_BUILTIN_CMPNGEPD,
15854 IX86_BUILTIN_CMPORDPD,
15855 IX86_BUILTIN_CMPUNORDPD,
15856 IX86_BUILTIN_CMPNEPD,
15857 IX86_BUILTIN_CMPEQSD,
15858 IX86_BUILTIN_CMPLTSD,
15859 IX86_BUILTIN_CMPLESD,
15860 IX86_BUILTIN_CMPNEQSD,
15861 IX86_BUILTIN_CMPNLTSD,
15862 IX86_BUILTIN_CMPNLESD,
15863 IX86_BUILTIN_CMPORDSD,
15864 IX86_BUILTIN_CMPUNORDSD,
15865 IX86_BUILTIN_CMPNESD,
15866
15867 IX86_BUILTIN_COMIEQSD,
15868 IX86_BUILTIN_COMILTSD,
15869 IX86_BUILTIN_COMILESD,
15870 IX86_BUILTIN_COMIGTSD,
15871 IX86_BUILTIN_COMIGESD,
15872 IX86_BUILTIN_COMINEQSD,
15873 IX86_BUILTIN_UCOMIEQSD,
15874 IX86_BUILTIN_UCOMILTSD,
15875 IX86_BUILTIN_UCOMILESD,
15876 IX86_BUILTIN_UCOMIGTSD,
15877 IX86_BUILTIN_UCOMIGESD,
15878 IX86_BUILTIN_UCOMINEQSD,
15879
15880 IX86_BUILTIN_MAXPD,
15881 IX86_BUILTIN_MAXSD,
15882 IX86_BUILTIN_MINPD,
15883 IX86_BUILTIN_MINSD,
15884
15885 IX86_BUILTIN_ANDPD,
15886 IX86_BUILTIN_ANDNPD,
15887 IX86_BUILTIN_ORPD,
15888 IX86_BUILTIN_XORPD,
15889
15890 IX86_BUILTIN_SQRTPD,
15891 IX86_BUILTIN_SQRTSD,
15892
15893 IX86_BUILTIN_UNPCKHPD,
15894 IX86_BUILTIN_UNPCKLPD,
15895
15896 IX86_BUILTIN_SHUFPD,
15897
15898 IX86_BUILTIN_LOADUPD,
15899 IX86_BUILTIN_STOREUPD,
15900 IX86_BUILTIN_MOVSD,
15901
15902 IX86_BUILTIN_LOADHPD,
15903 IX86_BUILTIN_LOADLPD,
15904
15905 IX86_BUILTIN_CVTDQ2PD,
15906 IX86_BUILTIN_CVTDQ2PS,
15907
15908 IX86_BUILTIN_CVTPD2DQ,
15909 IX86_BUILTIN_CVTPD2PI,
15910 IX86_BUILTIN_CVTPD2PS,
15911 IX86_BUILTIN_CVTTPD2DQ,
15912 IX86_BUILTIN_CVTTPD2PI,
15913
15914 IX86_BUILTIN_CVTPI2PD,
15915 IX86_BUILTIN_CVTSI2SD,
15916 IX86_BUILTIN_CVTSI642SD,
15917
15918 IX86_BUILTIN_CVTSD2SI,
15919 IX86_BUILTIN_CVTSD2SI64,
15920 IX86_BUILTIN_CVTSD2SS,
15921 IX86_BUILTIN_CVTSS2SD,
15922 IX86_BUILTIN_CVTTSD2SI,
15923 IX86_BUILTIN_CVTTSD2SI64,
15924
15925 IX86_BUILTIN_CVTPS2DQ,
15926 IX86_BUILTIN_CVTPS2PD,
15927 IX86_BUILTIN_CVTTPS2DQ,
15928
15929 IX86_BUILTIN_MOVNTI,
15930 IX86_BUILTIN_MOVNTPD,
15931 IX86_BUILTIN_MOVNTDQ,
15932
15933 /* SSE2 MMX */
15934 IX86_BUILTIN_MASKMOVDQU,
15935 IX86_BUILTIN_MOVMSKPD,
15936 IX86_BUILTIN_PMOVMSKB128,
15937
15938 IX86_BUILTIN_PACKSSWB128,
15939 IX86_BUILTIN_PACKSSDW128,
15940 IX86_BUILTIN_PACKUSWB128,
15941
15942 IX86_BUILTIN_PADDB128,
15943 IX86_BUILTIN_PADDW128,
15944 IX86_BUILTIN_PADDD128,
15945 IX86_BUILTIN_PADDQ128,
15946 IX86_BUILTIN_PADDSB128,
15947 IX86_BUILTIN_PADDSW128,
15948 IX86_BUILTIN_PADDUSB128,
15949 IX86_BUILTIN_PADDUSW128,
15950 IX86_BUILTIN_PSUBB128,
15951 IX86_BUILTIN_PSUBW128,
15952 IX86_BUILTIN_PSUBD128,
15953 IX86_BUILTIN_PSUBQ128,
15954 IX86_BUILTIN_PSUBSB128,
15955 IX86_BUILTIN_PSUBSW128,
15956 IX86_BUILTIN_PSUBUSB128,
15957 IX86_BUILTIN_PSUBUSW128,
15958
15959 IX86_BUILTIN_PAND128,
15960 IX86_BUILTIN_PANDN128,
15961 IX86_BUILTIN_POR128,
15962 IX86_BUILTIN_PXOR128,
15963
15964 IX86_BUILTIN_PAVGB128,
15965 IX86_BUILTIN_PAVGW128,
15966
15967 IX86_BUILTIN_PCMPEQB128,
15968 IX86_BUILTIN_PCMPEQW128,
15969 IX86_BUILTIN_PCMPEQD128,
15970 IX86_BUILTIN_PCMPGTB128,
15971 IX86_BUILTIN_PCMPGTW128,
15972 IX86_BUILTIN_PCMPGTD128,
15973
15974 IX86_BUILTIN_PMADDWD128,
15975
15976 IX86_BUILTIN_PMAXSW128,
15977 IX86_BUILTIN_PMAXUB128,
15978 IX86_BUILTIN_PMINSW128,
15979 IX86_BUILTIN_PMINUB128,
15980
15981 IX86_BUILTIN_PMULUDQ,
15982 IX86_BUILTIN_PMULUDQ128,
15983 IX86_BUILTIN_PMULHUW128,
15984 IX86_BUILTIN_PMULHW128,
15985 IX86_BUILTIN_PMULLW128,
15986
15987 IX86_BUILTIN_PSADBW128,
15988 IX86_BUILTIN_PSHUFHW,
15989 IX86_BUILTIN_PSHUFLW,
15990 IX86_BUILTIN_PSHUFD,
15991
15992 IX86_BUILTIN_PSLLW128,
15993 IX86_BUILTIN_PSLLD128,
15994 IX86_BUILTIN_PSLLQ128,
15995 IX86_BUILTIN_PSRAW128,
15996 IX86_BUILTIN_PSRAD128,
15997 IX86_BUILTIN_PSRLW128,
15998 IX86_BUILTIN_PSRLD128,
15999 IX86_BUILTIN_PSRLQ128,
16000 IX86_BUILTIN_PSLLDQI128,
16001 IX86_BUILTIN_PSLLWI128,
16002 IX86_BUILTIN_PSLLDI128,
16003 IX86_BUILTIN_PSLLQI128,
16004 IX86_BUILTIN_PSRAWI128,
16005 IX86_BUILTIN_PSRADI128,
16006 IX86_BUILTIN_PSRLDQI128,
16007 IX86_BUILTIN_PSRLWI128,
16008 IX86_BUILTIN_PSRLDI128,
16009 IX86_BUILTIN_PSRLQI128,
16010
16011 IX86_BUILTIN_PUNPCKHBW128,
16012 IX86_BUILTIN_PUNPCKHWD128,
16013 IX86_BUILTIN_PUNPCKHDQ128,
16014 IX86_BUILTIN_PUNPCKHQDQ128,
16015 IX86_BUILTIN_PUNPCKLBW128,
16016 IX86_BUILTIN_PUNPCKLWD128,
16017 IX86_BUILTIN_PUNPCKLDQ128,
16018 IX86_BUILTIN_PUNPCKLQDQ128,
16019
16020 IX86_BUILTIN_CLFLUSH,
16021 IX86_BUILTIN_MFENCE,
16022 IX86_BUILTIN_LFENCE,
16023
16024 /* Prescott New Instructions. */
16025 IX86_BUILTIN_ADDSUBPS,
16026 IX86_BUILTIN_HADDPS,
16027 IX86_BUILTIN_HSUBPS,
16028 IX86_BUILTIN_MOVSHDUP,
16029 IX86_BUILTIN_MOVSLDUP,
16030 IX86_BUILTIN_ADDSUBPD,
16031 IX86_BUILTIN_HADDPD,
16032 IX86_BUILTIN_HSUBPD,
16033 IX86_BUILTIN_LDDQU,
16034
16035 IX86_BUILTIN_MONITOR,
16036 IX86_BUILTIN_MWAIT,
16037
16038 /* SSSE3. */
16039 IX86_BUILTIN_PHADDW,
16040 IX86_BUILTIN_PHADDD,
16041 IX86_BUILTIN_PHADDSW,
16042 IX86_BUILTIN_PHSUBW,
16043 IX86_BUILTIN_PHSUBD,
16044 IX86_BUILTIN_PHSUBSW,
16045 IX86_BUILTIN_PMADDUBSW,
16046 IX86_BUILTIN_PMULHRSW,
16047 IX86_BUILTIN_PSHUFB,
16048 IX86_BUILTIN_PSIGNB,
16049 IX86_BUILTIN_PSIGNW,
16050 IX86_BUILTIN_PSIGND,
16051 IX86_BUILTIN_PALIGNR,
16052 IX86_BUILTIN_PABSB,
16053 IX86_BUILTIN_PABSW,
16054 IX86_BUILTIN_PABSD,
16055
16056 IX86_BUILTIN_PHADDW128,
16057 IX86_BUILTIN_PHADDD128,
16058 IX86_BUILTIN_PHADDSW128,
16059 IX86_BUILTIN_PHSUBW128,
16060 IX86_BUILTIN_PHSUBD128,
16061 IX86_BUILTIN_PHSUBSW128,
16062 IX86_BUILTIN_PMADDUBSW128,
16063 IX86_BUILTIN_PMULHRSW128,
16064 IX86_BUILTIN_PSHUFB128,
16065 IX86_BUILTIN_PSIGNB128,
16066 IX86_BUILTIN_PSIGNW128,
16067 IX86_BUILTIN_PSIGND128,
16068 IX86_BUILTIN_PALIGNR128,
16069 IX86_BUILTIN_PABSB128,
16070 IX86_BUILTIN_PABSW128,
16071 IX86_BUILTIN_PABSD128,
16072
16073 /* AMDFAM10 - SSE4A New Instructions. */
16074 IX86_BUILTIN_MOVNTSD,
16075 IX86_BUILTIN_MOVNTSS,
16076 IX86_BUILTIN_EXTRQI,
16077 IX86_BUILTIN_EXTRQ,
16078 IX86_BUILTIN_INSERTQI,
16079 IX86_BUILTIN_INSERTQ,
16080
16081 IX86_BUILTIN_VEC_INIT_V2SI,
16082 IX86_BUILTIN_VEC_INIT_V4HI,
16083 IX86_BUILTIN_VEC_INIT_V8QI,
16084 IX86_BUILTIN_VEC_EXT_V2DF,
16085 IX86_BUILTIN_VEC_EXT_V2DI,
16086 IX86_BUILTIN_VEC_EXT_V4SF,
16087 IX86_BUILTIN_VEC_EXT_V4SI,
16088 IX86_BUILTIN_VEC_EXT_V8HI,
16089 IX86_BUILTIN_VEC_EXT_V2SI,
16090 IX86_BUILTIN_VEC_EXT_V4HI,
16091 IX86_BUILTIN_VEC_SET_V8HI,
16092 IX86_BUILTIN_VEC_SET_V4HI,
16093
16094 IX86_BUILTIN_MAX
16095 };
16096
16097 /* Table for the ix86 builtin decls. */
16098 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16099
16100 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16101 * if the target_flags include one of MASK. Stores the function decl
16102 * in the ix86_builtins array.
16103 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16104
16105 static inline tree
16106 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16107 {
16108 tree decl = NULL_TREE;
16109
16110 if (mask & target_flags
16111 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16112 {
16113 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16114 NULL, NULL_TREE);
16115 ix86_builtins[(int) code] = decl;
16116 }
16117
16118 return decl;
16119 }
16120
16121 /* Like def_builtin, but also marks the function decl "const". */
16122
16123 static inline tree
16124 def_builtin_const (int mask, const char *name, tree type,
16125 enum ix86_builtins code)
16126 {
16127 tree decl = def_builtin (mask, name, type, code);
16128 if (decl)
16129 TREE_READONLY (decl) = 1;
16130 return decl;
16131 }
16132
16133 /* Bits for builtin_description.flag. */
16134
16135 /* Set when we don't support the comparison natively, and should
16136 swap_comparison in order to support it. */
16137 #define BUILTIN_DESC_SWAP_OPERANDS 1
16138
16139 struct builtin_description
16140 {
16141 const unsigned int mask;
16142 const enum insn_code icode;
16143 const char *const name;
16144 const enum ix86_builtins code;
16145 const enum rtx_code comparison;
16146 const unsigned int flag;
16147 };
16148
16149 static const struct builtin_description bdesc_comi[] =
16150 {
16151 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16152 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16153 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16154 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16155 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16156 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16157 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16158 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16159 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16160 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16161 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16162 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16163 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16164 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16165 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16166 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16167 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16168 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16169 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16170 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16171 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16172 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16173 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16174 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16175 };
16176
16177 static const struct builtin_description bdesc_2arg[] =
16178 {
16179 /* SSE */
16180 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16181 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16182 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16183 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16184 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16185 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16186 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16187 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16188
16189 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16190 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16191 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16192 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16193 BUILTIN_DESC_SWAP_OPERANDS },
16194 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16195 BUILTIN_DESC_SWAP_OPERANDS },
16196 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16197 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16198 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16199 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16200 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16201 BUILTIN_DESC_SWAP_OPERANDS },
16202 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16203 BUILTIN_DESC_SWAP_OPERANDS },
16204 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16205 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16206 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16207 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16208 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16209 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16210 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16211 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16212 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16213 BUILTIN_DESC_SWAP_OPERANDS },
16214 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16215 BUILTIN_DESC_SWAP_OPERANDS },
16216 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16217
16218 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16219 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16220 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16221 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16222
16223 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16224 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16225 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16226 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16227
16228 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16229 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16230 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16231 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16232 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16233
16234 /* MMX */
16235 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16236 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16237 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16238 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16239 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16240 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16241 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16242 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16243
16244 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16245 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16246 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16247 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16248 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16249 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16250 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16251 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16252
16253 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16254 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16255 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16256
16257 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16258 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16259 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16260 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16261
16262 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16263 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16264
16265 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16266 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16267 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16268 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16269 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16270 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16271
16272 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16273 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16274 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16275 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16276
16277 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16278 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16279 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16280 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16281 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16282 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16283
16284 /* Special. */
16285 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16286 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16287 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16288
16289 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16290 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16291 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16292
16293 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16294 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16295 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16296 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16297 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16298 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16299
16300 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16301 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16302 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16303 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16304 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16305 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16306
16307 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16308 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16309 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16310 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16311
16312 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16313 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16314
16315 /* SSE2 */
16316 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16317 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16318 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16319 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16320 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16321 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16322 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16323 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16324
16325 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16326 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16327 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16328 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16329 BUILTIN_DESC_SWAP_OPERANDS },
16330 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16331 BUILTIN_DESC_SWAP_OPERANDS },
16332 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16333 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16334 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16335 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16336 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16337 BUILTIN_DESC_SWAP_OPERANDS },
16338 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16339 BUILTIN_DESC_SWAP_OPERANDS },
16340 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16341 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16342 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16343 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16344 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16345 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16346 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16347 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16348 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16349
16350 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16351 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16352 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16353 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16354
16355 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16356 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16357 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16358 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16359
16360 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16361 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16362 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16363
16364 /* SSE2 MMX */
16365 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16366 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16367 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16368 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16371 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16372 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16373
16374 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16375 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16376 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16377 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16378 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16379 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16380 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16381 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16382
16383 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16384 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16385
16386 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16387 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16388 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16389 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16390
16391 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16392 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16393
16394 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16395 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16396 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16397 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16398 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16399 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16400
16401 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16402 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16403 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16404 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16405
16406 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16407 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16408 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16409 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16410 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16411 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16412 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16413 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16414
16415 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16416 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16417 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16418
16419 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16420 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16421
16422 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16423 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16424
16425 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16426 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16427 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16428
16429 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16430 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16431 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16432
16433 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16434 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16435
16436 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16437
16438 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16439 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16440 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16441 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16442
16443 /* SSE3 MMX */
16444 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16445 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16446 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16447 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16448 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16449 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16450
16451 /* SSSE3 */
16452 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16453 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16454 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16455 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16456 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16457 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16458 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16459 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16460 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16461 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16462 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16463 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16464 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16465 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16466 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16467 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16468 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16469 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16470 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16471 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16472 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16473 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16474 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16475 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16476 };
16477
16478 static const struct builtin_description bdesc_1arg[] =
16479 {
16480 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16481 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16482
16483 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16484 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16485 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16486
16487 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16488 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16489 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16490 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16491 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16492 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16493
16494 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16495 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16496
16497 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16498
16499 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16500 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16501
16502 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16503 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16504 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16505 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16506 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16507
16508 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16509
16510 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16511 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16512 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16513 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16514
16515 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16516 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16517 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16518
16519 /* SSE3 */
16520 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16521 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16522
16523 /* SSSE3 */
16524 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16525 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16526 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16527 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16528 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16529 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16530 };
16531
16532 static void
16533 ix86_init_builtins (void)
16534 {
16535 if (TARGET_MMX)
16536 ix86_init_mmx_sse_builtins ();
16537 }
16538
16539 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16540 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16541 builtins. */
16542 static void
16543 ix86_init_mmx_sse_builtins (void)
16544 {
16545 const struct builtin_description * d;
16546 size_t i;
16547
16548 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16549 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16550 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16551 tree V2DI_type_node
16552 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16553 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16554 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16555 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16556 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16557 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16558 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16559
16560 tree pchar_type_node = build_pointer_type (char_type_node);
16561 tree pcchar_type_node = build_pointer_type (
16562 build_type_variant (char_type_node, 1, 0));
16563 tree pfloat_type_node = build_pointer_type (float_type_node);
16564 tree pcfloat_type_node = build_pointer_type (
16565 build_type_variant (float_type_node, 1, 0));
16566 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16567 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16568 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16569
16570 /* Comparisons. */
16571 tree int_ftype_v4sf_v4sf
16572 = build_function_type_list (integer_type_node,
16573 V4SF_type_node, V4SF_type_node, NULL_TREE);
16574 tree v4si_ftype_v4sf_v4sf
16575 = build_function_type_list (V4SI_type_node,
16576 V4SF_type_node, V4SF_type_node, NULL_TREE);
16577 /* MMX/SSE/integer conversions. */
16578 tree int_ftype_v4sf
16579 = build_function_type_list (integer_type_node,
16580 V4SF_type_node, NULL_TREE);
16581 tree int64_ftype_v4sf
16582 = build_function_type_list (long_long_integer_type_node,
16583 V4SF_type_node, NULL_TREE);
16584 tree int_ftype_v8qi
16585 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16586 tree v4sf_ftype_v4sf_int
16587 = build_function_type_list (V4SF_type_node,
16588 V4SF_type_node, integer_type_node, NULL_TREE);
16589 tree v4sf_ftype_v4sf_int64
16590 = build_function_type_list (V4SF_type_node,
16591 V4SF_type_node, long_long_integer_type_node,
16592 NULL_TREE);
16593 tree v4sf_ftype_v4sf_v2si
16594 = build_function_type_list (V4SF_type_node,
16595 V4SF_type_node, V2SI_type_node, NULL_TREE);
16596
16597 /* Miscellaneous. */
16598 tree v8qi_ftype_v4hi_v4hi
16599 = build_function_type_list (V8QI_type_node,
16600 V4HI_type_node, V4HI_type_node, NULL_TREE);
16601 tree v4hi_ftype_v2si_v2si
16602 = build_function_type_list (V4HI_type_node,
16603 V2SI_type_node, V2SI_type_node, NULL_TREE);
16604 tree v4sf_ftype_v4sf_v4sf_int
16605 = build_function_type_list (V4SF_type_node,
16606 V4SF_type_node, V4SF_type_node,
16607 integer_type_node, NULL_TREE);
16608 tree v2si_ftype_v4hi_v4hi
16609 = build_function_type_list (V2SI_type_node,
16610 V4HI_type_node, V4HI_type_node, NULL_TREE);
16611 tree v4hi_ftype_v4hi_int
16612 = build_function_type_list (V4HI_type_node,
16613 V4HI_type_node, integer_type_node, NULL_TREE);
16614 tree v4hi_ftype_v4hi_di
16615 = build_function_type_list (V4HI_type_node,
16616 V4HI_type_node, long_long_unsigned_type_node,
16617 NULL_TREE);
16618 tree v2si_ftype_v2si_di
16619 = build_function_type_list (V2SI_type_node,
16620 V2SI_type_node, long_long_unsigned_type_node,
16621 NULL_TREE);
16622 tree void_ftype_void
16623 = build_function_type (void_type_node, void_list_node);
16624 tree void_ftype_unsigned
16625 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16626 tree void_ftype_unsigned_unsigned
16627 = build_function_type_list (void_type_node, unsigned_type_node,
16628 unsigned_type_node, NULL_TREE);
16629 tree void_ftype_pcvoid_unsigned_unsigned
16630 = build_function_type_list (void_type_node, const_ptr_type_node,
16631 unsigned_type_node, unsigned_type_node,
16632 NULL_TREE);
16633 tree unsigned_ftype_void
16634 = build_function_type (unsigned_type_node, void_list_node);
16635 tree v2si_ftype_v4sf
16636 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16637 /* Loads/stores. */
16638 tree void_ftype_v8qi_v8qi_pchar
16639 = build_function_type_list (void_type_node,
16640 V8QI_type_node, V8QI_type_node,
16641 pchar_type_node, NULL_TREE);
16642 tree v4sf_ftype_pcfloat
16643 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16644 /* @@@ the type is bogus */
16645 tree v4sf_ftype_v4sf_pv2si
16646 = build_function_type_list (V4SF_type_node,
16647 V4SF_type_node, pv2si_type_node, NULL_TREE);
16648 tree void_ftype_pv2si_v4sf
16649 = build_function_type_list (void_type_node,
16650 pv2si_type_node, V4SF_type_node, NULL_TREE);
16651 tree void_ftype_pfloat_v4sf
16652 = build_function_type_list (void_type_node,
16653 pfloat_type_node, V4SF_type_node, NULL_TREE);
16654 tree void_ftype_pdi_di
16655 = build_function_type_list (void_type_node,
16656 pdi_type_node, long_long_unsigned_type_node,
16657 NULL_TREE);
16658 tree void_ftype_pv2di_v2di
16659 = build_function_type_list (void_type_node,
16660 pv2di_type_node, V2DI_type_node, NULL_TREE);
16661 /* Normal vector unops. */
16662 tree v4sf_ftype_v4sf
16663 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16664 tree v16qi_ftype_v16qi
16665 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16666 tree v8hi_ftype_v8hi
16667 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16668 tree v4si_ftype_v4si
16669 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16670 tree v8qi_ftype_v8qi
16671 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16672 tree v4hi_ftype_v4hi
16673 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16674
16675 /* Normal vector binops. */
16676 tree v4sf_ftype_v4sf_v4sf
16677 = build_function_type_list (V4SF_type_node,
16678 V4SF_type_node, V4SF_type_node, NULL_TREE);
16679 tree v8qi_ftype_v8qi_v8qi
16680 = build_function_type_list (V8QI_type_node,
16681 V8QI_type_node, V8QI_type_node, NULL_TREE);
16682 tree v4hi_ftype_v4hi_v4hi
16683 = build_function_type_list (V4HI_type_node,
16684 V4HI_type_node, V4HI_type_node, NULL_TREE);
16685 tree v2si_ftype_v2si_v2si
16686 = build_function_type_list (V2SI_type_node,
16687 V2SI_type_node, V2SI_type_node, NULL_TREE);
16688 tree di_ftype_di_di
16689 = build_function_type_list (long_long_unsigned_type_node,
16690 long_long_unsigned_type_node,
16691 long_long_unsigned_type_node, NULL_TREE);
16692
16693 tree di_ftype_di_di_int
16694 = build_function_type_list (long_long_unsigned_type_node,
16695 long_long_unsigned_type_node,
16696 long_long_unsigned_type_node,
16697 integer_type_node, NULL_TREE);
16698
16699 tree v2si_ftype_v2sf
16700 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16701 tree v2sf_ftype_v2si
16702 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16703 tree v2si_ftype_v2si
16704 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16705 tree v2sf_ftype_v2sf
16706 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16707 tree v2sf_ftype_v2sf_v2sf
16708 = build_function_type_list (V2SF_type_node,
16709 V2SF_type_node, V2SF_type_node, NULL_TREE);
16710 tree v2si_ftype_v2sf_v2sf
16711 = build_function_type_list (V2SI_type_node,
16712 V2SF_type_node, V2SF_type_node, NULL_TREE);
16713 tree pint_type_node = build_pointer_type (integer_type_node);
16714 tree pdouble_type_node = build_pointer_type (double_type_node);
16715 tree pcdouble_type_node = build_pointer_type (
16716 build_type_variant (double_type_node, 1, 0));
16717 tree int_ftype_v2df_v2df
16718 = build_function_type_list (integer_type_node,
16719 V2DF_type_node, V2DF_type_node, NULL_TREE);
16720
16721 tree void_ftype_pcvoid
16722 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16723 tree v4sf_ftype_v4si
16724 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16725 tree v4si_ftype_v4sf
16726 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16727 tree v2df_ftype_v4si
16728 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16729 tree v4si_ftype_v2df
16730 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16731 tree v2si_ftype_v2df
16732 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16733 tree v4sf_ftype_v2df
16734 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16735 tree v2df_ftype_v2si
16736 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16737 tree v2df_ftype_v4sf
16738 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16739 tree int_ftype_v2df
16740 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16741 tree int64_ftype_v2df
16742 = build_function_type_list (long_long_integer_type_node,
16743 V2DF_type_node, NULL_TREE);
16744 tree v2df_ftype_v2df_int
16745 = build_function_type_list (V2DF_type_node,
16746 V2DF_type_node, integer_type_node, NULL_TREE);
16747 tree v2df_ftype_v2df_int64
16748 = build_function_type_list (V2DF_type_node,
16749 V2DF_type_node, long_long_integer_type_node,
16750 NULL_TREE);
16751 tree v4sf_ftype_v4sf_v2df
16752 = build_function_type_list (V4SF_type_node,
16753 V4SF_type_node, V2DF_type_node, NULL_TREE);
16754 tree v2df_ftype_v2df_v4sf
16755 = build_function_type_list (V2DF_type_node,
16756 V2DF_type_node, V4SF_type_node, NULL_TREE);
16757 tree v2df_ftype_v2df_v2df_int
16758 = build_function_type_list (V2DF_type_node,
16759 V2DF_type_node, V2DF_type_node,
16760 integer_type_node,
16761 NULL_TREE);
16762 tree v2df_ftype_v2df_pcdouble
16763 = build_function_type_list (V2DF_type_node,
16764 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16765 tree void_ftype_pdouble_v2df
16766 = build_function_type_list (void_type_node,
16767 pdouble_type_node, V2DF_type_node, NULL_TREE);
16768 tree void_ftype_pint_int
16769 = build_function_type_list (void_type_node,
16770 pint_type_node, integer_type_node, NULL_TREE);
16771 tree void_ftype_v16qi_v16qi_pchar
16772 = build_function_type_list (void_type_node,
16773 V16QI_type_node, V16QI_type_node,
16774 pchar_type_node, NULL_TREE);
16775 tree v2df_ftype_pcdouble
16776 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16777 tree v2df_ftype_v2df_v2df
16778 = build_function_type_list (V2DF_type_node,
16779 V2DF_type_node, V2DF_type_node, NULL_TREE);
16780 tree v16qi_ftype_v16qi_v16qi
16781 = build_function_type_list (V16QI_type_node,
16782 V16QI_type_node, V16QI_type_node, NULL_TREE);
16783 tree v8hi_ftype_v8hi_v8hi
16784 = build_function_type_list (V8HI_type_node,
16785 V8HI_type_node, V8HI_type_node, NULL_TREE);
16786 tree v4si_ftype_v4si_v4si
16787 = build_function_type_list (V4SI_type_node,
16788 V4SI_type_node, V4SI_type_node, NULL_TREE);
16789 tree v2di_ftype_v2di_v2di
16790 = build_function_type_list (V2DI_type_node,
16791 V2DI_type_node, V2DI_type_node, NULL_TREE);
16792 tree v2di_ftype_v2df_v2df
16793 = build_function_type_list (V2DI_type_node,
16794 V2DF_type_node, V2DF_type_node, NULL_TREE);
16795 tree v2df_ftype_v2df
16796 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16797 tree v2di_ftype_v2di_int
16798 = build_function_type_list (V2DI_type_node,
16799 V2DI_type_node, integer_type_node, NULL_TREE);
16800 tree v2di_ftype_v2di_v2di_int
16801 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16802 V2DI_type_node, integer_type_node, NULL_TREE);
16803 tree v4si_ftype_v4si_int
16804 = build_function_type_list (V4SI_type_node,
16805 V4SI_type_node, integer_type_node, NULL_TREE);
16806 tree v8hi_ftype_v8hi_int
16807 = build_function_type_list (V8HI_type_node,
16808 V8HI_type_node, integer_type_node, NULL_TREE);
16809 tree v8hi_ftype_v8hi_v2di
16810 = build_function_type_list (V8HI_type_node,
16811 V8HI_type_node, V2DI_type_node, NULL_TREE);
16812 tree v4si_ftype_v4si_v2di
16813 = build_function_type_list (V4SI_type_node,
16814 V4SI_type_node, V2DI_type_node, NULL_TREE);
16815 tree v4si_ftype_v8hi_v8hi
16816 = build_function_type_list (V4SI_type_node,
16817 V8HI_type_node, V8HI_type_node, NULL_TREE);
16818 tree di_ftype_v8qi_v8qi
16819 = build_function_type_list (long_long_unsigned_type_node,
16820 V8QI_type_node, V8QI_type_node, NULL_TREE);
16821 tree di_ftype_v2si_v2si
16822 = build_function_type_list (long_long_unsigned_type_node,
16823 V2SI_type_node, V2SI_type_node, NULL_TREE);
16824 tree v2di_ftype_v16qi_v16qi
16825 = build_function_type_list (V2DI_type_node,
16826 V16QI_type_node, V16QI_type_node, NULL_TREE);
16827 tree v2di_ftype_v4si_v4si
16828 = build_function_type_list (V2DI_type_node,
16829 V4SI_type_node, V4SI_type_node, NULL_TREE);
16830 tree int_ftype_v16qi
16831 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16832 tree v16qi_ftype_pcchar
16833 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16834 tree void_ftype_pchar_v16qi
16835 = build_function_type_list (void_type_node,
16836 pchar_type_node, V16QI_type_node, NULL_TREE);
16837
16838 tree v2di_ftype_v2di_unsigned_unsigned
16839 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16840 unsigned_type_node, unsigned_type_node,
16841 NULL_TREE);
16842 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16843 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16844 unsigned_type_node, unsigned_type_node,
16845 NULL_TREE);
16846 tree v2di_ftype_v2di_v16qi
16847 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16848 NULL_TREE);
16849
16850 tree float80_type;
16851 tree float128_type;
16852 tree ftype;
16853
16854 /* The __float80 type. */
16855 if (TYPE_MODE (long_double_type_node) == XFmode)
16856 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16857 "__float80");
16858 else
16859 {
16860 /* The __float80 type. */
16861 float80_type = make_node (REAL_TYPE);
16862 TYPE_PRECISION (float80_type) = 80;
16863 layout_type (float80_type);
16864 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16865 }
16866
16867 if (TARGET_64BIT)
16868 {
16869 float128_type = make_node (REAL_TYPE);
16870 TYPE_PRECISION (float128_type) = 128;
16871 layout_type (float128_type);
16872 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16873 }
16874
16875 /* Add all builtins that are more or less simple operations on two
16876 operands. */
16877 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16878 {
16879 /* Use one of the operands; the target can have a different mode for
16880 mask-generating compares. */
16881 enum machine_mode mode;
16882 tree type;
16883
16884 if (d->name == 0)
16885 continue;
16886 mode = insn_data[d->icode].operand[1].mode;
16887
16888 switch (mode)
16889 {
16890 case V16QImode:
16891 type = v16qi_ftype_v16qi_v16qi;
16892 break;
16893 case V8HImode:
16894 type = v8hi_ftype_v8hi_v8hi;
16895 break;
16896 case V4SImode:
16897 type = v4si_ftype_v4si_v4si;
16898 break;
16899 case V2DImode:
16900 type = v2di_ftype_v2di_v2di;
16901 break;
16902 case V2DFmode:
16903 type = v2df_ftype_v2df_v2df;
16904 break;
16905 case V4SFmode:
16906 type = v4sf_ftype_v4sf_v4sf;
16907 break;
16908 case V8QImode:
16909 type = v8qi_ftype_v8qi_v8qi;
16910 break;
16911 case V4HImode:
16912 type = v4hi_ftype_v4hi_v4hi;
16913 break;
16914 case V2SImode:
16915 type = v2si_ftype_v2si_v2si;
16916 break;
16917 case DImode:
16918 type = di_ftype_di_di;
16919 break;
16920
16921 default:
16922 gcc_unreachable ();
16923 }
16924
16925 /* Override for comparisons. */
16926 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16927 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16928 type = v4si_ftype_v4sf_v4sf;
16929
16930 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16931 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16932 type = v2di_ftype_v2df_v2df;
16933
16934 def_builtin (d->mask, d->name, type, d->code);
16935 }
16936
16937 /* Add all builtins that are more or less simple operations on 1 operand. */
16938 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16939 {
16940 enum machine_mode mode;
16941 tree type;
16942
16943 if (d->name == 0)
16944 continue;
16945 mode = insn_data[d->icode].operand[1].mode;
16946
16947 switch (mode)
16948 {
16949 case V16QImode:
16950 type = v16qi_ftype_v16qi;
16951 break;
16952 case V8HImode:
16953 type = v8hi_ftype_v8hi;
16954 break;
16955 case V4SImode:
16956 type = v4si_ftype_v4si;
16957 break;
16958 case V2DFmode:
16959 type = v2df_ftype_v2df;
16960 break;
16961 case V4SFmode:
16962 type = v4sf_ftype_v4sf;
16963 break;
16964 case V8QImode:
16965 type = v8qi_ftype_v8qi;
16966 break;
16967 case V4HImode:
16968 type = v4hi_ftype_v4hi;
16969 break;
16970 case V2SImode:
16971 type = v2si_ftype_v2si;
16972 break;
16973
16974 default:
16975 abort ();
16976 }
16977
16978 def_builtin (d->mask, d->name, type, d->code);
16979 }
16980
16981 /* Add the remaining MMX insns with somewhat more complicated types. */
16982 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16983 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16984 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16985 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16986
16987 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16988 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16989 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16990
16991 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16992 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16993
16994 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16995 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16996
16997 /* comi/ucomi insns. */
16998 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16999 if (d->mask == MASK_SSE2)
17000 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17001 else
17002 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17003
17004 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17005 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17006 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17007
17008 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17009 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17010 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17011 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17012 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17013 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17014 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17015 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17016 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17017 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17018 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17019
17020 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17021
17022 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17023 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17024
17025 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17026 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17027 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17028 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17029
17030 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17031 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17032 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17033 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17034
17035 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17036
17037 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17038
17039 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17040 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17041 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17042 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17043 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17044 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17045
17046 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17047
17048 /* Original 3DNow! */
17049 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17050 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17051 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17052 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17053 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17054 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17055 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17056 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17057 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17058 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17059 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17060 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17061 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17062 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17063 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17064 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17065 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17066 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17067 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17068 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17069
17070 /* 3DNow! extension as used in the Athlon CPU. */
17071 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17072 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17073 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17074 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17075 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17076 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17077
17078 /* SSE2 */
17079 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17080
17081 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17082 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17083
17084 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17085 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17086
17087 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17088 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17089 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17090 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17091 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17092
17093 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17094 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17095 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17096 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17097
17098 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17099 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17100
17101 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17102
17103 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17104 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17105
17106 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17107 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17108 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17109 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17110 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17111
17112 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17113
17114 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17115 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17116 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17117 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17118
17119 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17120 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17121 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17122
17123 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17124 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17125 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17126 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17127
17128 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17129 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17130 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17131
17132 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17133 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17134
17135 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17136 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17137
17138 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17139 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17140 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17141
17142 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17143 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17144 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17145
17146 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17147 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17148
17149 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17150 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17151 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17152 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17153
17154 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17155 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17156 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17157 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17158
17159 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17160 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17161
17162 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17163
17164 /* Prescott New Instructions. */
17165 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17166 void_ftype_pcvoid_unsigned_unsigned,
17167 IX86_BUILTIN_MONITOR);
17168 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17169 void_ftype_unsigned_unsigned,
17170 IX86_BUILTIN_MWAIT);
17171 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17172 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17173
17174 /* SSSE3. */
17175 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17176 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17177 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17178 IX86_BUILTIN_PALIGNR);
17179
17180 /* AMDFAM10 SSE4A New built-ins */
17181 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17182 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17183 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17184 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17185 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17186 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17187 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17188 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17189 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17190 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17191 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17192 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17193
17194 /* Access to the vec_init patterns. */
17195 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17196 integer_type_node, NULL_TREE);
17197 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17198 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17199
17200 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17201 short_integer_type_node,
17202 short_integer_type_node,
17203 short_integer_type_node, NULL_TREE);
17204 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17205 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17206
17207 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17208 char_type_node, char_type_node,
17209 char_type_node, char_type_node,
17210 char_type_node, char_type_node,
17211 char_type_node, NULL_TREE);
17212 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17213 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17214
17215 /* Access to the vec_extract patterns. */
17216 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17217 integer_type_node, NULL_TREE);
17218 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17219 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17220
17221 ftype = build_function_type_list (long_long_integer_type_node,
17222 V2DI_type_node, integer_type_node,
17223 NULL_TREE);
17224 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17225 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17226
17227 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17228 integer_type_node, NULL_TREE);
17229 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17230 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17231
17232 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17233 integer_type_node, NULL_TREE);
17234 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17235 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17236
17237 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17238 integer_type_node, NULL_TREE);
17239 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17240 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17241
17242 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17243 integer_type_node, NULL_TREE);
17244 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17245 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17246
17247 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17248 integer_type_node, NULL_TREE);
17249 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17250 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17251
17252 /* Access to the vec_set patterns. */
17253 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17254 intHI_type_node,
17255 integer_type_node, NULL_TREE);
17256 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17257 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17258
17259 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17260 intHI_type_node,
17261 integer_type_node, NULL_TREE);
17262 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17263 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17264 }
17265
17266 /* Errors in the source file can cause expand_expr to return const0_rtx
17267 where we expect a vector. To avoid crashing, use one of the vector
17268 clear instructions. */
17269 static rtx
17270 safe_vector_operand (rtx x, enum machine_mode mode)
17271 {
17272 if (x == const0_rtx)
17273 x = CONST0_RTX (mode);
17274 return x;
17275 }
17276
17277 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17278
17279 static rtx
17280 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17281 {
17282 rtx pat, xops[3];
17283 tree arg0 = CALL_EXPR_ARG (exp, 0);
17284 tree arg1 = CALL_EXPR_ARG (exp, 1);
17285 rtx op0 = expand_normal (arg0);
17286 rtx op1 = expand_normal (arg1);
17287 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17288 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17289 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17290
17291 if (VECTOR_MODE_P (mode0))
17292 op0 = safe_vector_operand (op0, mode0);
17293 if (VECTOR_MODE_P (mode1))
17294 op1 = safe_vector_operand (op1, mode1);
17295
17296 if (optimize || !target
17297 || GET_MODE (target) != tmode
17298 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17299 target = gen_reg_rtx (tmode);
17300
17301 if (GET_MODE (op1) == SImode && mode1 == TImode)
17302 {
17303 rtx x = gen_reg_rtx (V4SImode);
17304 emit_insn (gen_sse2_loadd (x, op1));
17305 op1 = gen_lowpart (TImode, x);
17306 }
17307
17308 /* The insn must want input operands in the same modes as the
17309 result. */
17310 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17311 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17312
17313 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17314 op0 = copy_to_mode_reg (mode0, op0);
17315 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17316 op1 = copy_to_mode_reg (mode1, op1);
17317
17318 /* ??? Using ix86_fixup_binary_operands is problematic when
17319 we've got mismatched modes. Fake it. */
17320
17321 xops[0] = target;
17322 xops[1] = op0;
17323 xops[2] = op1;
17324
17325 if (tmode == mode0 && tmode == mode1)
17326 {
17327 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17328 op0 = xops[1];
17329 op1 = xops[2];
17330 }
17331 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17332 {
17333 op0 = force_reg (mode0, op0);
17334 op1 = force_reg (mode1, op1);
17335 target = gen_reg_rtx (tmode);
17336 }
17337
17338 pat = GEN_FCN (icode) (target, op0, op1);
17339 if (! pat)
17340 return 0;
17341 emit_insn (pat);
17342 return target;
17343 }
17344
17345 /* Subroutine of ix86_expand_builtin to take care of stores. */
17346
17347 static rtx
17348 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17349 {
17350 rtx pat;
17351 tree arg0 = CALL_EXPR_ARG (exp, 0);
17352 tree arg1 = CALL_EXPR_ARG (exp, 1);
17353 rtx op0 = expand_normal (arg0);
17354 rtx op1 = expand_normal (arg1);
17355 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17356 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17357
17358 if (VECTOR_MODE_P (mode1))
17359 op1 = safe_vector_operand (op1, mode1);
17360
17361 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17362 op1 = copy_to_mode_reg (mode1, op1);
17363
17364 pat = GEN_FCN (icode) (op0, op1);
17365 if (pat)
17366 emit_insn (pat);
17367 return 0;
17368 }
17369
17370 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17371
17372 static rtx
17373 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17374 rtx target, int do_load)
17375 {
17376 rtx pat;
17377 tree arg0 = CALL_EXPR_ARG (exp, 0);
17378 rtx op0 = expand_normal (arg0);
17379 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17380 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17381
17382 if (optimize || !target
17383 || GET_MODE (target) != tmode
17384 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17385 target = gen_reg_rtx (tmode);
17386 if (do_load)
17387 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17388 else
17389 {
17390 if (VECTOR_MODE_P (mode0))
17391 op0 = safe_vector_operand (op0, mode0);
17392
17393 if ((optimize && !register_operand (op0, mode0))
17394 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17395 op0 = copy_to_mode_reg (mode0, op0);
17396 }
17397
17398 pat = GEN_FCN (icode) (target, op0);
17399 if (! pat)
17400 return 0;
17401 emit_insn (pat);
17402 return target;
17403 }
17404
17405 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17406 sqrtss, rsqrtss, rcpss. */
17407
17408 static rtx
17409 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17410 {
17411 rtx pat;
17412 tree arg0 = CALL_EXPR_ARG (exp, 0);
17413 rtx op1, op0 = expand_normal (arg0);
17414 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17415 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17416
17417 if (optimize || !target
17418 || GET_MODE (target) != tmode
17419 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17420 target = gen_reg_rtx (tmode);
17421
17422 if (VECTOR_MODE_P (mode0))
17423 op0 = safe_vector_operand (op0, mode0);
17424
17425 if ((optimize && !register_operand (op0, mode0))
17426 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17427 op0 = copy_to_mode_reg (mode0, op0);
17428
17429 op1 = op0;
17430 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17431 op1 = copy_to_mode_reg (mode0, op1);
17432
17433 pat = GEN_FCN (icode) (target, op0, op1);
17434 if (! pat)
17435 return 0;
17436 emit_insn (pat);
17437 return target;
17438 }
17439
17440 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17441
17442 static rtx
17443 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17444 rtx target)
17445 {
17446 rtx pat;
17447 tree arg0 = CALL_EXPR_ARG (exp, 0);
17448 tree arg1 = CALL_EXPR_ARG (exp, 1);
17449 rtx op0 = expand_normal (arg0);
17450 rtx op1 = expand_normal (arg1);
17451 rtx op2;
17452 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17453 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17454 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17455 enum rtx_code comparison = d->comparison;
17456
17457 if (VECTOR_MODE_P (mode0))
17458 op0 = safe_vector_operand (op0, mode0);
17459 if (VECTOR_MODE_P (mode1))
17460 op1 = safe_vector_operand (op1, mode1);
17461
17462 /* Swap operands if we have a comparison that isn't available in
17463 hardware. */
17464 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17465 {
17466 rtx tmp = gen_reg_rtx (mode1);
17467 emit_move_insn (tmp, op1);
17468 op1 = op0;
17469 op0 = tmp;
17470 }
17471
17472 if (optimize || !target
17473 || GET_MODE (target) != tmode
17474 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17475 target = gen_reg_rtx (tmode);
17476
17477 if ((optimize && !register_operand (op0, mode0))
17478 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17479 op0 = copy_to_mode_reg (mode0, op0);
17480 if ((optimize && !register_operand (op1, mode1))
17481 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17482 op1 = copy_to_mode_reg (mode1, op1);
17483
17484 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17485 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17486 if (! pat)
17487 return 0;
17488 emit_insn (pat);
17489 return target;
17490 }
17491
17492 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17493
17494 static rtx
17495 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17496 rtx target)
17497 {
17498 rtx pat;
17499 tree arg0 = CALL_EXPR_ARG (exp, 0);
17500 tree arg1 = CALL_EXPR_ARG (exp, 1);
17501 rtx op0 = expand_normal (arg0);
17502 rtx op1 = expand_normal (arg1);
17503 rtx op2;
17504 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17505 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17506 enum rtx_code comparison = d->comparison;
17507
17508 if (VECTOR_MODE_P (mode0))
17509 op0 = safe_vector_operand (op0, mode0);
17510 if (VECTOR_MODE_P (mode1))
17511 op1 = safe_vector_operand (op1, mode1);
17512
17513 /* Swap operands if we have a comparison that isn't available in
17514 hardware. */
17515 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17516 {
17517 rtx tmp = op1;
17518 op1 = op0;
17519 op0 = tmp;
17520 }
17521
17522 target = gen_reg_rtx (SImode);
17523 emit_move_insn (target, const0_rtx);
17524 target = gen_rtx_SUBREG (QImode, target, 0);
17525
17526 if ((optimize && !register_operand (op0, mode0))
17527 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17528 op0 = copy_to_mode_reg (mode0, op0);
17529 if ((optimize && !register_operand (op1, mode1))
17530 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17531 op1 = copy_to_mode_reg (mode1, op1);
17532
17533 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17534 pat = GEN_FCN (d->icode) (op0, op1);
17535 if (! pat)
17536 return 0;
17537 emit_insn (pat);
17538 emit_insn (gen_rtx_SET (VOIDmode,
17539 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17540 gen_rtx_fmt_ee (comparison, QImode,
17541 SET_DEST (pat),
17542 const0_rtx)));
17543
17544 return SUBREG_REG (target);
17545 }
17546
17547 /* Return the integer constant in ARG. Constrain it to be in the range
17548 of the subparts of VEC_TYPE; issue an error if not. */
17549
17550 static int
17551 get_element_number (tree vec_type, tree arg)
17552 {
17553 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17554
17555 if (!host_integerp (arg, 1)
17556 || (elt = tree_low_cst (arg, 1), elt > max))
17557 {
17558 error ("selector must be an integer constant in the range 0..%wi", max);
17559 return 0;
17560 }
17561
17562 return elt;
17563 }
17564
17565 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17566 ix86_expand_vector_init. We DO have language-level syntax for this, in
17567 the form of (type){ init-list }. Except that since we can't place emms
17568 instructions from inside the compiler, we can't allow the use of MMX
17569 registers unless the user explicitly asks for it. So we do *not* define
17570 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17571 we have builtins invoked by mmintrin.h that gives us license to emit
17572 these sorts of instructions. */
17573
17574 static rtx
17575 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17576 {
17577 enum machine_mode tmode = TYPE_MODE (type);
17578 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17579 int i, n_elt = GET_MODE_NUNITS (tmode);
17580 rtvec v = rtvec_alloc (n_elt);
17581
17582 gcc_assert (VECTOR_MODE_P (tmode));
17583 gcc_assert (call_expr_nargs (exp) == n_elt);
17584
17585 for (i = 0; i < n_elt; ++i)
17586 {
17587 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17588 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17589 }
17590
17591 if (!target || !register_operand (target, tmode))
17592 target = gen_reg_rtx (tmode);
17593
17594 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17595 return target;
17596 }
17597
17598 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17599 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17600 had a language-level syntax for referencing vector elements. */
17601
17602 static rtx
17603 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17604 {
17605 enum machine_mode tmode, mode0;
17606 tree arg0, arg1;
17607 int elt;
17608 rtx op0;
17609
17610 arg0 = CALL_EXPR_ARG (exp, 0);
17611 arg1 = CALL_EXPR_ARG (exp, 1);
17612
17613 op0 = expand_normal (arg0);
17614 elt = get_element_number (TREE_TYPE (arg0), arg1);
17615
17616 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17617 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17618 gcc_assert (VECTOR_MODE_P (mode0));
17619
17620 op0 = force_reg (mode0, op0);
17621
17622 if (optimize || !target || !register_operand (target, tmode))
17623 target = gen_reg_rtx (tmode);
17624
17625 ix86_expand_vector_extract (true, target, op0, elt);
17626
17627 return target;
17628 }
17629
17630 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17631 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17632 a language-level syntax for referencing vector elements. */
17633
17634 static rtx
17635 ix86_expand_vec_set_builtin (tree exp)
17636 {
17637 enum machine_mode tmode, mode1;
17638 tree arg0, arg1, arg2;
17639 int elt;
17640 rtx op0, op1;
17641
17642 arg0 = CALL_EXPR_ARG (exp, 0);
17643 arg1 = CALL_EXPR_ARG (exp, 1);
17644 arg2 = CALL_EXPR_ARG (exp, 2);
17645
17646 tmode = TYPE_MODE (TREE_TYPE (arg0));
17647 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17648 gcc_assert (VECTOR_MODE_P (tmode));
17649
17650 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17651 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17652 elt = get_element_number (TREE_TYPE (arg0), arg2);
17653
17654 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17655 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17656
17657 op0 = force_reg (tmode, op0);
17658 op1 = force_reg (mode1, op1);
17659
17660 ix86_expand_vector_set (true, op0, op1, elt);
17661
17662 return op0;
17663 }
17664
17665 /* Expand an expression EXP that calls a built-in function,
17666 with result going to TARGET if that's convenient
17667 (and in mode MODE if that's convenient).
17668 SUBTARGET may be used as the target for computing one of EXP's operands.
17669 IGNORE is nonzero if the value is to be ignored. */
17670
17671 static rtx
17672 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17673 enum machine_mode mode ATTRIBUTE_UNUSED,
17674 int ignore ATTRIBUTE_UNUSED)
17675 {
17676 const struct builtin_description *d;
17677 size_t i;
17678 enum insn_code icode;
17679 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17680 tree arg0, arg1, arg2, arg3;
17681 rtx op0, op1, op2, op3, pat;
17682 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17683 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17684
17685 switch (fcode)
17686 {
17687 case IX86_BUILTIN_EMMS:
17688 emit_insn (gen_mmx_emms ());
17689 return 0;
17690
17691 case IX86_BUILTIN_SFENCE:
17692 emit_insn (gen_sse_sfence ());
17693 return 0;
17694
17695 case IX86_BUILTIN_MASKMOVQ:
17696 case IX86_BUILTIN_MASKMOVDQU:
17697 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17698 ? CODE_FOR_mmx_maskmovq
17699 : CODE_FOR_sse2_maskmovdqu);
17700 /* Note the arg order is different from the operand order. */
17701 arg1 = CALL_EXPR_ARG (exp, 0);
17702 arg2 = CALL_EXPR_ARG (exp, 1);
17703 arg0 = CALL_EXPR_ARG (exp, 2);
17704 op0 = expand_normal (arg0);
17705 op1 = expand_normal (arg1);
17706 op2 = expand_normal (arg2);
17707 mode0 = insn_data[icode].operand[0].mode;
17708 mode1 = insn_data[icode].operand[1].mode;
17709 mode2 = insn_data[icode].operand[2].mode;
17710
17711 op0 = force_reg (Pmode, op0);
17712 op0 = gen_rtx_MEM (mode1, op0);
17713
17714 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17715 op0 = copy_to_mode_reg (mode0, op0);
17716 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17717 op1 = copy_to_mode_reg (mode1, op1);
17718 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17719 op2 = copy_to_mode_reg (mode2, op2);
17720 pat = GEN_FCN (icode) (op0, op1, op2);
17721 if (! pat)
17722 return 0;
17723 emit_insn (pat);
17724 return 0;
17725
17726 case IX86_BUILTIN_SQRTSS:
17727 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17728 case IX86_BUILTIN_RSQRTSS:
17729 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17730 case IX86_BUILTIN_RCPSS:
17731 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17732
17733 case IX86_BUILTIN_LOADUPS:
17734 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17735
17736 case IX86_BUILTIN_STOREUPS:
17737 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17738
17739 case IX86_BUILTIN_LOADHPS:
17740 case IX86_BUILTIN_LOADLPS:
17741 case IX86_BUILTIN_LOADHPD:
17742 case IX86_BUILTIN_LOADLPD:
17743 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17744 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17745 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17746 : CODE_FOR_sse2_loadlpd);
17747 arg0 = CALL_EXPR_ARG (exp, 0);
17748 arg1 = CALL_EXPR_ARG (exp, 1);
17749 op0 = expand_normal (arg0);
17750 op1 = expand_normal (arg1);
17751 tmode = insn_data[icode].operand[0].mode;
17752 mode0 = insn_data[icode].operand[1].mode;
17753 mode1 = insn_data[icode].operand[2].mode;
17754
17755 op0 = force_reg (mode0, op0);
17756 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17757 if (optimize || target == 0
17758 || GET_MODE (target) != tmode
17759 || !register_operand (target, tmode))
17760 target = gen_reg_rtx (tmode);
17761 pat = GEN_FCN (icode) (target, op0, op1);
17762 if (! pat)
17763 return 0;
17764 emit_insn (pat);
17765 return target;
17766
17767 case IX86_BUILTIN_STOREHPS:
17768 case IX86_BUILTIN_STORELPS:
17769 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17770 : CODE_FOR_sse_storelps);
17771 arg0 = CALL_EXPR_ARG (exp, 0);
17772 arg1 = CALL_EXPR_ARG (exp, 1);
17773 op0 = expand_normal (arg0);
17774 op1 = expand_normal (arg1);
17775 mode0 = insn_data[icode].operand[0].mode;
17776 mode1 = insn_data[icode].operand[1].mode;
17777
17778 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17779 op1 = force_reg (mode1, op1);
17780
17781 pat = GEN_FCN (icode) (op0, op1);
17782 if (! pat)
17783 return 0;
17784 emit_insn (pat);
17785 return const0_rtx;
17786
17787 case IX86_BUILTIN_MOVNTPS:
17788 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17789 case IX86_BUILTIN_MOVNTQ:
17790 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17791
17792 case IX86_BUILTIN_LDMXCSR:
17793 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17794 target = assign_386_stack_local (SImode, SLOT_TEMP);
17795 emit_move_insn (target, op0);
17796 emit_insn (gen_sse_ldmxcsr (target));
17797 return 0;
17798
17799 case IX86_BUILTIN_STMXCSR:
17800 target = assign_386_stack_local (SImode, SLOT_TEMP);
17801 emit_insn (gen_sse_stmxcsr (target));
17802 return copy_to_mode_reg (SImode, target);
17803
17804 case IX86_BUILTIN_SHUFPS:
17805 case IX86_BUILTIN_SHUFPD:
17806 icode = (fcode == IX86_BUILTIN_SHUFPS
17807 ? CODE_FOR_sse_shufps
17808 : CODE_FOR_sse2_shufpd);
17809 arg0 = CALL_EXPR_ARG (exp, 0);
17810 arg1 = CALL_EXPR_ARG (exp, 1);
17811 arg2 = CALL_EXPR_ARG (exp, 2);
17812 op0 = expand_normal (arg0);
17813 op1 = expand_normal (arg1);
17814 op2 = expand_normal (arg2);
17815 tmode = insn_data[icode].operand[0].mode;
17816 mode0 = insn_data[icode].operand[1].mode;
17817 mode1 = insn_data[icode].operand[2].mode;
17818 mode2 = insn_data[icode].operand[3].mode;
17819
17820 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17821 op0 = copy_to_mode_reg (mode0, op0);
17822 if ((optimize && !register_operand (op1, mode1))
17823 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17824 op1 = copy_to_mode_reg (mode1, op1);
17825 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17826 {
17827 /* @@@ better error message */
17828 error ("mask must be an immediate");
17829 return gen_reg_rtx (tmode);
17830 }
17831 if (optimize || target == 0
17832 || GET_MODE (target) != tmode
17833 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17834 target = gen_reg_rtx (tmode);
17835 pat = GEN_FCN (icode) (target, op0, op1, op2);
17836 if (! pat)
17837 return 0;
17838 emit_insn (pat);
17839 return target;
17840
17841 case IX86_BUILTIN_PSHUFW:
17842 case IX86_BUILTIN_PSHUFD:
17843 case IX86_BUILTIN_PSHUFHW:
17844 case IX86_BUILTIN_PSHUFLW:
17845 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17846 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17847 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17848 : CODE_FOR_mmx_pshufw);
17849 arg0 = CALL_EXPR_ARG (exp, 0);
17850 arg1 = CALL_EXPR_ARG (exp, 1);
17851 op0 = expand_normal (arg0);
17852 op1 = expand_normal (arg1);
17853 tmode = insn_data[icode].operand[0].mode;
17854 mode1 = insn_data[icode].operand[1].mode;
17855 mode2 = insn_data[icode].operand[2].mode;
17856
17857 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17858 op0 = copy_to_mode_reg (mode1, op0);
17859 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17860 {
17861 /* @@@ better error message */
17862 error ("mask must be an immediate");
17863 return const0_rtx;
17864 }
17865 if (target == 0
17866 || GET_MODE (target) != tmode
17867 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17868 target = gen_reg_rtx (tmode);
17869 pat = GEN_FCN (icode) (target, op0, op1);
17870 if (! pat)
17871 return 0;
17872 emit_insn (pat);
17873 return target;
17874
17875 case IX86_BUILTIN_PSLLDQI128:
17876 case IX86_BUILTIN_PSRLDQI128:
17877 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17878 : CODE_FOR_sse2_lshrti3);
17879 arg0 = CALL_EXPR_ARG (exp, 0);
17880 arg1 = CALL_EXPR_ARG (exp, 1);
17881 op0 = expand_normal (arg0);
17882 op1 = expand_normal (arg1);
17883 tmode = insn_data[icode].operand[0].mode;
17884 mode1 = insn_data[icode].operand[1].mode;
17885 mode2 = insn_data[icode].operand[2].mode;
17886
17887 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17888 {
17889 op0 = copy_to_reg (op0);
17890 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17891 }
17892 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17893 {
17894 error ("shift must be an immediate");
17895 return const0_rtx;
17896 }
17897 target = gen_reg_rtx (V2DImode);
17898 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
17899 if (! pat)
17900 return 0;
17901 emit_insn (pat);
17902 return target;
17903
17904 case IX86_BUILTIN_FEMMS:
17905 emit_insn (gen_mmx_femms ());
17906 return NULL_RTX;
17907
17908 case IX86_BUILTIN_PAVGUSB:
17909 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17910
17911 case IX86_BUILTIN_PF2ID:
17912 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17913
17914 case IX86_BUILTIN_PFACC:
17915 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17916
17917 case IX86_BUILTIN_PFADD:
17918 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17919
17920 case IX86_BUILTIN_PFCMPEQ:
17921 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17922
17923 case IX86_BUILTIN_PFCMPGE:
17924 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17925
17926 case IX86_BUILTIN_PFCMPGT:
17927 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17928
17929 case IX86_BUILTIN_PFMAX:
17930 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17931
17932 case IX86_BUILTIN_PFMIN:
17933 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17934
17935 case IX86_BUILTIN_PFMUL:
17936 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17937
17938 case IX86_BUILTIN_PFRCP:
17939 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17940
17941 case IX86_BUILTIN_PFRCPIT1:
17942 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17943
17944 case IX86_BUILTIN_PFRCPIT2:
17945 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17946
17947 case IX86_BUILTIN_PFRSQIT1:
17948 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17949
17950 case IX86_BUILTIN_PFRSQRT:
17951 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17952
17953 case IX86_BUILTIN_PFSUB:
17954 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17955
17956 case IX86_BUILTIN_PFSUBR:
17957 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17958
17959 case IX86_BUILTIN_PI2FD:
17960 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17961
17962 case IX86_BUILTIN_PMULHRW:
17963 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17964
17965 case IX86_BUILTIN_PF2IW:
17966 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17967
17968 case IX86_BUILTIN_PFNACC:
17969 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
17970
17971 case IX86_BUILTIN_PFPNACC:
17972 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
17973
17974 case IX86_BUILTIN_PI2FW:
17975 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
17976
17977 case IX86_BUILTIN_PSWAPDSI:
17978 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
17979
17980 case IX86_BUILTIN_PSWAPDSF:
17981 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
17982
17983 case IX86_BUILTIN_SQRTSD:
17984 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
17985 case IX86_BUILTIN_LOADUPD:
17986 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
17987 case IX86_BUILTIN_STOREUPD:
17988 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
17989
17990 case IX86_BUILTIN_MFENCE:
17991 emit_insn (gen_sse2_mfence ());
17992 return 0;
17993 case IX86_BUILTIN_LFENCE:
17994 emit_insn (gen_sse2_lfence ());
17995 return 0;
17996
17997 case IX86_BUILTIN_CLFLUSH:
17998 arg0 = CALL_EXPR_ARG (exp, 0);
17999 op0 = expand_normal (arg0);
18000 icode = CODE_FOR_sse2_clflush;
18001 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18002 op0 = copy_to_mode_reg (Pmode, op0);
18003
18004 emit_insn (gen_sse2_clflush (op0));
18005 return 0;
18006
18007 case IX86_BUILTIN_MOVNTPD:
18008 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18009 case IX86_BUILTIN_MOVNTDQ:
18010 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18011 case IX86_BUILTIN_MOVNTI:
18012 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18013
18014 case IX86_BUILTIN_LOADDQU:
18015 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18016 case IX86_BUILTIN_STOREDQU:
18017 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18018
18019 case IX86_BUILTIN_MONITOR:
18020 arg0 = CALL_EXPR_ARG (exp, 0);
18021 arg1 = CALL_EXPR_ARG (exp, 1);
18022 arg2 = CALL_EXPR_ARG (exp, 2);
18023 op0 = expand_normal (arg0);
18024 op1 = expand_normal (arg1);
18025 op2 = expand_normal (arg2);
18026 if (!REG_P (op0))
18027 op0 = copy_to_mode_reg (Pmode, op0);
18028 if (!REG_P (op1))
18029 op1 = copy_to_mode_reg (SImode, op1);
18030 if (!REG_P (op2))
18031 op2 = copy_to_mode_reg (SImode, op2);
18032 if (!TARGET_64BIT)
18033 emit_insn (gen_sse3_monitor (op0, op1, op2));
18034 else
18035 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18036 return 0;
18037
18038 case IX86_BUILTIN_MWAIT:
18039 arg0 = CALL_EXPR_ARG (exp, 0);
18040 arg1 = CALL_EXPR_ARG (exp, 1);
18041 op0 = expand_normal (arg0);
18042 op1 = expand_normal (arg1);
18043 if (!REG_P (op0))
18044 op0 = copy_to_mode_reg (SImode, op0);
18045 if (!REG_P (op1))
18046 op1 = copy_to_mode_reg (SImode, op1);
18047 emit_insn (gen_sse3_mwait (op0, op1));
18048 return 0;
18049
18050 case IX86_BUILTIN_LDDQU:
18051 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18052 target, 1);
18053
18054 case IX86_BUILTIN_PALIGNR:
18055 case IX86_BUILTIN_PALIGNR128:
18056 if (fcode == IX86_BUILTIN_PALIGNR)
18057 {
18058 icode = CODE_FOR_ssse3_palignrdi;
18059 mode = DImode;
18060 }
18061 else
18062 {
18063 icode = CODE_FOR_ssse3_palignrti;
18064 mode = V2DImode;
18065 }
18066 arg0 = CALL_EXPR_ARG (exp, 0);
18067 arg1 = CALL_EXPR_ARG (exp, 1);
18068 arg2 = CALL_EXPR_ARG (exp, 2);
18069 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18070 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18071 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18072 tmode = insn_data[icode].operand[0].mode;
18073 mode1 = insn_data[icode].operand[1].mode;
18074 mode2 = insn_data[icode].operand[2].mode;
18075 mode3 = insn_data[icode].operand[3].mode;
18076
18077 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18078 {
18079 op0 = copy_to_reg (op0);
18080 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18081 }
18082 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18083 {
18084 op1 = copy_to_reg (op1);
18085 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18086 }
18087 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18088 {
18089 error ("shift must be an immediate");
18090 return const0_rtx;
18091 }
18092 target = gen_reg_rtx (mode);
18093 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18094 op0, op1, op2);
18095 if (! pat)
18096 return 0;
18097 emit_insn (pat);
18098 return target;
18099
18100 case IX86_BUILTIN_MOVNTSD:
18101 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18102
18103 case IX86_BUILTIN_MOVNTSS:
18104 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18105
18106 case IX86_BUILTIN_INSERTQ:
18107 case IX86_BUILTIN_EXTRQ:
18108 icode = (fcode == IX86_BUILTIN_EXTRQ
18109 ? CODE_FOR_sse4a_extrq
18110 : CODE_FOR_sse4a_insertq);
18111 arg0 = CALL_EXPR_ARG (exp, 0);
18112 arg1 = CALL_EXPR_ARG (exp, 1);
18113 op0 = expand_normal (arg0);
18114 op1 = expand_normal (arg1);
18115 tmode = insn_data[icode].operand[0].mode;
18116 mode1 = insn_data[icode].operand[1].mode;
18117 mode2 = insn_data[icode].operand[2].mode;
18118 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18119 op0 = copy_to_mode_reg (mode1, op0);
18120 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18121 op1 = copy_to_mode_reg (mode2, op1);
18122 if (optimize || target == 0
18123 || GET_MODE (target) != tmode
18124 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18125 target = gen_reg_rtx (tmode);
18126 pat = GEN_FCN (icode) (target, op0, op1);
18127 if (! pat)
18128 return NULL_RTX;
18129 emit_insn (pat);
18130 return target;
18131
18132 case IX86_BUILTIN_EXTRQI:
18133 icode = CODE_FOR_sse4a_extrqi;
18134 arg0 = CALL_EXPR_ARG (exp, 0);
18135 arg1 = CALL_EXPR_ARG (exp, 1);
18136 arg2 = CALL_EXPR_ARG (exp, 2);
18137 op0 = expand_normal (arg0);
18138 op1 = expand_normal (arg1);
18139 op2 = expand_normal (arg2);
18140 tmode = insn_data[icode].operand[0].mode;
18141 mode1 = insn_data[icode].operand[1].mode;
18142 mode2 = insn_data[icode].operand[2].mode;
18143 mode3 = insn_data[icode].operand[3].mode;
18144 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18145 op0 = copy_to_mode_reg (mode1, op0);
18146 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18147 {
18148 error ("index mask must be an immediate");
18149 return gen_reg_rtx (tmode);
18150 }
18151 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18152 {
18153 error ("length mask must be an immediate");
18154 return gen_reg_rtx (tmode);
18155 }
18156 if (optimize || target == 0
18157 || GET_MODE (target) != tmode
18158 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18159 target = gen_reg_rtx (tmode);
18160 pat = GEN_FCN (icode) (target, op0, op1, op2);
18161 if (! pat)
18162 return NULL_RTX;
18163 emit_insn (pat);
18164 return target;
18165
18166 case IX86_BUILTIN_INSERTQI:
18167 icode = CODE_FOR_sse4a_insertqi;
18168 arg0 = CALL_EXPR_ARG (exp, 0);
18169 arg1 = CALL_EXPR_ARG (exp, 1);
18170 arg2 = CALL_EXPR_ARG (exp, 2);
18171 arg3 = CALL_EXPR_ARG (exp, 3);
18172 op0 = expand_normal (arg0);
18173 op1 = expand_normal (arg1);
18174 op2 = expand_normal (arg2);
18175 op3 = expand_normal (arg3);
18176 tmode = insn_data[icode].operand[0].mode;
18177 mode1 = insn_data[icode].operand[1].mode;
18178 mode2 = insn_data[icode].operand[2].mode;
18179 mode3 = insn_data[icode].operand[3].mode;
18180 mode4 = insn_data[icode].operand[4].mode;
18181
18182 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18183 op0 = copy_to_mode_reg (mode1, op0);
18184
18185 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18186 op1 = copy_to_mode_reg (mode2, op1);
18187
18188 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18189 {
18190 error ("index mask must be an immediate");
18191 return gen_reg_rtx (tmode);
18192 }
18193 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18194 {
18195 error ("length mask must be an immediate");
18196 return gen_reg_rtx (tmode);
18197 }
18198 if (optimize || target == 0
18199 || GET_MODE (target) != tmode
18200 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18201 target = gen_reg_rtx (tmode);
18202 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18203 if (! pat)
18204 return NULL_RTX;
18205 emit_insn (pat);
18206 return target;
18207
18208 case IX86_BUILTIN_VEC_INIT_V2SI:
18209 case IX86_BUILTIN_VEC_INIT_V4HI:
18210 case IX86_BUILTIN_VEC_INIT_V8QI:
18211 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18212
18213 case IX86_BUILTIN_VEC_EXT_V2DF:
18214 case IX86_BUILTIN_VEC_EXT_V2DI:
18215 case IX86_BUILTIN_VEC_EXT_V4SF:
18216 case IX86_BUILTIN_VEC_EXT_V4SI:
18217 case IX86_BUILTIN_VEC_EXT_V8HI:
18218 case IX86_BUILTIN_VEC_EXT_V2SI:
18219 case IX86_BUILTIN_VEC_EXT_V4HI:
18220 return ix86_expand_vec_ext_builtin (exp, target);
18221
18222 case IX86_BUILTIN_VEC_SET_V8HI:
18223 case IX86_BUILTIN_VEC_SET_V4HI:
18224 return ix86_expand_vec_set_builtin (exp);
18225
18226 default:
18227 break;
18228 }
18229
18230 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18231 if (d->code == fcode)
18232 {
18233 /* Compares are treated specially. */
18234 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18235 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18236 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18237 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18238 return ix86_expand_sse_compare (d, exp, target);
18239
18240 return ix86_expand_binop_builtin (d->icode, exp, target);
18241 }
18242
18243 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18244 if (d->code == fcode)
18245 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18246
18247 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18248 if (d->code == fcode)
18249 return ix86_expand_sse_comi (d, exp, target);
18250
18251 gcc_unreachable ();
18252 }
18253
18254 /* Returns a function decl for a vectorized version of the builtin function
18255 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18256 if it is not available. */
18257
18258 static tree
18259 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18260 tree type_in)
18261 {
18262 enum machine_mode in_mode, out_mode;
18263 int in_n, out_n;
18264
18265 if (TREE_CODE (type_out) != VECTOR_TYPE
18266 || TREE_CODE (type_in) != VECTOR_TYPE)
18267 return NULL_TREE;
18268
18269 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18270 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18271 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18272 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18273
18274 switch (fn)
18275 {
18276 case BUILT_IN_SQRT:
18277 if (out_mode == DFmode && out_n == 2
18278 && in_mode == DFmode && in_n == 2)
18279 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18280 return NULL_TREE;
18281
18282 case BUILT_IN_SQRTF:
18283 if (out_mode == SFmode && out_n == 4
18284 && in_mode == SFmode && in_n == 4)
18285 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18286 return NULL_TREE;
18287
18288 case BUILT_IN_LRINTF:
18289 if (out_mode == SImode && out_n == 4
18290 && in_mode == SFmode && in_n == 4)
18291 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18292 return NULL_TREE;
18293
18294 default:
18295 ;
18296 }
18297
18298 return NULL_TREE;
18299 }
18300
18301 /* Returns a decl of a function that implements conversion of the
18302 input vector of type TYPE, or NULL_TREE if it is not available. */
18303
18304 static tree
18305 ix86_builtin_conversion (enum tree_code code, tree type)
18306 {
18307 if (TREE_CODE (type) != VECTOR_TYPE)
18308 return NULL_TREE;
18309
18310 switch (code)
18311 {
18312 case FLOAT_EXPR:
18313 switch (TYPE_MODE (type))
18314 {
18315 case V4SImode:
18316 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18317 default:
18318 return NULL_TREE;
18319 }
18320
18321 case FIX_TRUNC_EXPR:
18322 switch (TYPE_MODE (type))
18323 {
18324 case V4SFmode:
18325 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18326 default:
18327 return NULL_TREE;
18328 }
18329 default:
18330 return NULL_TREE;
18331
18332 }
18333 }
18334
18335 /* Store OPERAND to the memory after reload is completed. This means
18336 that we can't easily use assign_stack_local. */
18337 rtx
18338 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18339 {
18340 rtx result;
18341
18342 gcc_assert (reload_completed);
18343 if (TARGET_RED_ZONE)
18344 {
18345 result = gen_rtx_MEM (mode,
18346 gen_rtx_PLUS (Pmode,
18347 stack_pointer_rtx,
18348 GEN_INT (-RED_ZONE_SIZE)));
18349 emit_move_insn (result, operand);
18350 }
18351 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18352 {
18353 switch (mode)
18354 {
18355 case HImode:
18356 case SImode:
18357 operand = gen_lowpart (DImode, operand);
18358 /* FALLTHRU */
18359 case DImode:
18360 emit_insn (
18361 gen_rtx_SET (VOIDmode,
18362 gen_rtx_MEM (DImode,
18363 gen_rtx_PRE_DEC (DImode,
18364 stack_pointer_rtx)),
18365 operand));
18366 break;
18367 default:
18368 gcc_unreachable ();
18369 }
18370 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18371 }
18372 else
18373 {
18374 switch (mode)
18375 {
18376 case DImode:
18377 {
18378 rtx operands[2];
18379 split_di (&operand, 1, operands, operands + 1);
18380 emit_insn (
18381 gen_rtx_SET (VOIDmode,
18382 gen_rtx_MEM (SImode,
18383 gen_rtx_PRE_DEC (Pmode,
18384 stack_pointer_rtx)),
18385 operands[1]));
18386 emit_insn (
18387 gen_rtx_SET (VOIDmode,
18388 gen_rtx_MEM (SImode,
18389 gen_rtx_PRE_DEC (Pmode,
18390 stack_pointer_rtx)),
18391 operands[0]));
18392 }
18393 break;
18394 case HImode:
18395 /* Store HImodes as SImodes. */
18396 operand = gen_lowpart (SImode, operand);
18397 /* FALLTHRU */
18398 case SImode:
18399 emit_insn (
18400 gen_rtx_SET (VOIDmode,
18401 gen_rtx_MEM (GET_MODE (operand),
18402 gen_rtx_PRE_DEC (SImode,
18403 stack_pointer_rtx)),
18404 operand));
18405 break;
18406 default:
18407 gcc_unreachable ();
18408 }
18409 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18410 }
18411 return result;
18412 }
18413
18414 /* Free operand from the memory. */
18415 void
18416 ix86_free_from_memory (enum machine_mode mode)
18417 {
18418 if (!TARGET_RED_ZONE)
18419 {
18420 int size;
18421
18422 if (mode == DImode || TARGET_64BIT)
18423 size = 8;
18424 else
18425 size = 4;
18426 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18427 to pop or add instruction if registers are available. */
18428 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18429 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18430 GEN_INT (size))));
18431 }
18432 }
18433
18434 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18435 QImode must go into class Q_REGS.
18436 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18437 movdf to do mem-to-mem moves through integer regs. */
18438 enum reg_class
18439 ix86_preferred_reload_class (rtx x, enum reg_class class)
18440 {
18441 enum machine_mode mode = GET_MODE (x);
18442
18443 /* We're only allowed to return a subclass of CLASS. Many of the
18444 following checks fail for NO_REGS, so eliminate that early. */
18445 if (class == NO_REGS)
18446 return NO_REGS;
18447
18448 /* All classes can load zeros. */
18449 if (x == CONST0_RTX (mode))
18450 return class;
18451
18452 /* Force constants into memory if we are loading a (nonzero) constant into
18453 an MMX or SSE register. This is because there are no MMX/SSE instructions
18454 to load from a constant. */
18455 if (CONSTANT_P (x)
18456 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18457 return NO_REGS;
18458
18459 /* Prefer SSE regs only, if we can use them for math. */
18460 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18461 return SSE_CLASS_P (class) ? class : NO_REGS;
18462
18463 /* Floating-point constants need more complex checks. */
18464 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18465 {
18466 /* General regs can load everything. */
18467 if (reg_class_subset_p (class, GENERAL_REGS))
18468 return class;
18469
18470 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18471 zero above. We only want to wind up preferring 80387 registers if
18472 we plan on doing computation with them. */
18473 if (TARGET_80387
18474 && standard_80387_constant_p (x))
18475 {
18476 /* Limit class to non-sse. */
18477 if (class == FLOAT_SSE_REGS)
18478 return FLOAT_REGS;
18479 if (class == FP_TOP_SSE_REGS)
18480 return FP_TOP_REG;
18481 if (class == FP_SECOND_SSE_REGS)
18482 return FP_SECOND_REG;
18483 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18484 return class;
18485 }
18486
18487 return NO_REGS;
18488 }
18489
18490 /* Generally when we see PLUS here, it's the function invariant
18491 (plus soft-fp const_int). Which can only be computed into general
18492 regs. */
18493 if (GET_CODE (x) == PLUS)
18494 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18495
18496 /* QImode constants are easy to load, but non-constant QImode data
18497 must go into Q_REGS. */
18498 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18499 {
18500 if (reg_class_subset_p (class, Q_REGS))
18501 return class;
18502 if (reg_class_subset_p (Q_REGS, class))
18503 return Q_REGS;
18504 return NO_REGS;
18505 }
18506
18507 return class;
18508 }
18509
18510 /* Discourage putting floating-point values in SSE registers unless
18511 SSE math is being used, and likewise for the 387 registers. */
18512 enum reg_class
18513 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18514 {
18515 enum machine_mode mode = GET_MODE (x);
18516
18517 /* Restrict the output reload class to the register bank that we are doing
18518 math on. If we would like not to return a subset of CLASS, reject this
18519 alternative: if reload cannot do this, it will still use its choice. */
18520 mode = GET_MODE (x);
18521 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18522 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18523
18524 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18525 {
18526 if (class == FP_TOP_SSE_REGS)
18527 return FP_TOP_REG;
18528 else if (class == FP_SECOND_SSE_REGS)
18529 return FP_SECOND_REG;
18530 else
18531 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18532 }
18533
18534 return class;
18535 }
18536
18537 /* If we are copying between general and FP registers, we need a memory
18538 location. The same is true for SSE and MMX registers.
18539
18540 The macro can't work reliably when one of the CLASSES is class containing
18541 registers from multiple units (SSE, MMX, integer). We avoid this by never
18542 combining those units in single alternative in the machine description.
18543 Ensure that this constraint holds to avoid unexpected surprises.
18544
18545 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18546 enforce these sanity checks. */
18547
18548 int
18549 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18550 enum machine_mode mode, int strict)
18551 {
18552 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18553 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18554 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18555 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18556 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18557 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18558 {
18559 gcc_assert (!strict);
18560 return true;
18561 }
18562
18563 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18564 return true;
18565
18566 /* ??? This is a lie. We do have moves between mmx/general, and for
18567 mmx/sse2. But by saying we need secondary memory we discourage the
18568 register allocator from using the mmx registers unless needed. */
18569 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18570 return true;
18571
18572 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18573 {
18574 /* SSE1 doesn't have any direct moves from other classes. */
18575 if (!TARGET_SSE2)
18576 return true;
18577
18578 /* If the target says that inter-unit moves are more expensive
18579 than moving through memory, then don't generate them. */
18580 if (!TARGET_INTER_UNIT_MOVES)
18581 return true;
18582
18583 /* Between SSE and general, we have moves no larger than word size. */
18584 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18585 return true;
18586 }
18587
18588 return false;
18589 }
18590
18591 /* Return true if the registers in CLASS cannot represent the change from
18592 modes FROM to TO. */
18593
18594 bool
18595 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18596 enum reg_class class)
18597 {
18598 if (from == to)
18599 return false;
18600
18601 /* x87 registers can't do subreg at all, as all values are reformatted
18602 to extended precision. */
18603 if (MAYBE_FLOAT_CLASS_P (class))
18604 return true;
18605
18606 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18607 {
18608 /* Vector registers do not support QI or HImode loads. If we don't
18609 disallow a change to these modes, reload will assume it's ok to
18610 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18611 the vec_dupv4hi pattern. */
18612 if (GET_MODE_SIZE (from) < 4)
18613 return true;
18614
18615 /* Vector registers do not support subreg with nonzero offsets, which
18616 are otherwise valid for integer registers. Since we can't see
18617 whether we have a nonzero offset from here, prohibit all
18618 nonparadoxical subregs changing size. */
18619 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18620 return true;
18621 }
18622
18623 return false;
18624 }
18625
18626 /* Return the cost of moving data from a register in class CLASS1 to
18627 one in class CLASS2.
18628
18629 It is not required that the cost always equal 2 when FROM is the same as TO;
18630 on some machines it is expensive to move between registers if they are not
18631 general registers. */
18632
18633 int
18634 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18635 enum reg_class class2)
18636 {
18637 /* In case we require secondary memory, compute cost of the store followed
18638 by load. In order to avoid bad register allocation choices, we need
18639 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18640
18641 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18642 {
18643 int cost = 1;
18644
18645 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18646 MEMORY_MOVE_COST (mode, class1, 1));
18647 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18648 MEMORY_MOVE_COST (mode, class2, 1));
18649
18650 /* In case of copying from general_purpose_register we may emit multiple
18651 stores followed by single load causing memory size mismatch stall.
18652 Count this as arbitrarily high cost of 20. */
18653 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18654 cost += 20;
18655
18656 /* In the case of FP/MMX moves, the registers actually overlap, and we
18657 have to switch modes in order to treat them differently. */
18658 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18659 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18660 cost += 20;
18661
18662 return cost;
18663 }
18664
18665 /* Moves between SSE/MMX and integer unit are expensive. */
18666 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18667 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18668 return ix86_cost->mmxsse_to_integer;
18669 if (MAYBE_FLOAT_CLASS_P (class1))
18670 return ix86_cost->fp_move;
18671 if (MAYBE_SSE_CLASS_P (class1))
18672 return ix86_cost->sse_move;
18673 if (MAYBE_MMX_CLASS_P (class1))
18674 return ix86_cost->mmx_move;
18675 return 2;
18676 }
18677
18678 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18679
18680 bool
18681 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18682 {
18683 /* Flags and only flags can only hold CCmode values. */
18684 if (CC_REGNO_P (regno))
18685 return GET_MODE_CLASS (mode) == MODE_CC;
18686 if (GET_MODE_CLASS (mode) == MODE_CC
18687 || GET_MODE_CLASS (mode) == MODE_RANDOM
18688 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18689 return 0;
18690 if (FP_REGNO_P (regno))
18691 return VALID_FP_MODE_P (mode);
18692 if (SSE_REGNO_P (regno))
18693 {
18694 /* We implement the move patterns for all vector modes into and
18695 out of SSE registers, even when no operation instructions
18696 are available. */
18697 return (VALID_SSE_REG_MODE (mode)
18698 || VALID_SSE2_REG_MODE (mode)
18699 || VALID_MMX_REG_MODE (mode)
18700 || VALID_MMX_REG_MODE_3DNOW (mode));
18701 }
18702 if (MMX_REGNO_P (regno))
18703 {
18704 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18705 so if the register is available at all, then we can move data of
18706 the given mode into or out of it. */
18707 return (VALID_MMX_REG_MODE (mode)
18708 || VALID_MMX_REG_MODE_3DNOW (mode));
18709 }
18710
18711 if (mode == QImode)
18712 {
18713 /* Take care for QImode values - they can be in non-QI regs,
18714 but then they do cause partial register stalls. */
18715 if (regno < 4 || TARGET_64BIT)
18716 return 1;
18717 if (!TARGET_PARTIAL_REG_STALL)
18718 return 1;
18719 return reload_in_progress || reload_completed;
18720 }
18721 /* We handle both integer and floats in the general purpose registers. */
18722 else if (VALID_INT_MODE_P (mode))
18723 return 1;
18724 else if (VALID_FP_MODE_P (mode))
18725 return 1;
18726 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18727 on to use that value in smaller contexts, this can easily force a
18728 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18729 supporting DImode, allow it. */
18730 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18731 return 1;
18732
18733 return 0;
18734 }
18735
18736 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18737 tieable integer mode. */
18738
18739 static bool
18740 ix86_tieable_integer_mode_p (enum machine_mode mode)
18741 {
18742 switch (mode)
18743 {
18744 case HImode:
18745 case SImode:
18746 return true;
18747
18748 case QImode:
18749 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18750
18751 case DImode:
18752 return TARGET_64BIT;
18753
18754 default:
18755 return false;
18756 }
18757 }
18758
18759 /* Return true if MODE1 is accessible in a register that can hold MODE2
18760 without copying. That is, all register classes that can hold MODE2
18761 can also hold MODE1. */
18762
18763 bool
18764 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18765 {
18766 if (mode1 == mode2)
18767 return true;
18768
18769 if (ix86_tieable_integer_mode_p (mode1)
18770 && ix86_tieable_integer_mode_p (mode2))
18771 return true;
18772
18773 /* MODE2 being XFmode implies fp stack or general regs, which means we
18774 can tie any smaller floating point modes to it. Note that we do not
18775 tie this with TFmode. */
18776 if (mode2 == XFmode)
18777 return mode1 == SFmode || mode1 == DFmode;
18778
18779 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18780 that we can tie it with SFmode. */
18781 if (mode2 == DFmode)
18782 return mode1 == SFmode;
18783
18784 /* If MODE2 is only appropriate for an SSE register, then tie with
18785 any other mode acceptable to SSE registers. */
18786 if (GET_MODE_SIZE (mode2) >= 8
18787 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18788 return ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1);
18789
18790 /* If MODE2 is appropriate for an MMX (or SSE) register, then tie
18791 with any other mode acceptable to MMX registers. */
18792 if (GET_MODE_SIZE (mode2) == 8
18793 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18794 return ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1);
18795
18796 return false;
18797 }
18798
18799 /* Return the cost of moving data of mode M between a
18800 register and memory. A value of 2 is the default; this cost is
18801 relative to those in `REGISTER_MOVE_COST'.
18802
18803 If moving between registers and memory is more expensive than
18804 between two registers, you should define this macro to express the
18805 relative cost.
18806
18807 Model also increased moving costs of QImode registers in non
18808 Q_REGS classes.
18809 */
18810 int
18811 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18812 {
18813 if (FLOAT_CLASS_P (class))
18814 {
18815 int index;
18816 switch (mode)
18817 {
18818 case SFmode:
18819 index = 0;
18820 break;
18821 case DFmode:
18822 index = 1;
18823 break;
18824 case XFmode:
18825 index = 2;
18826 break;
18827 default:
18828 return 100;
18829 }
18830 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18831 }
18832 if (SSE_CLASS_P (class))
18833 {
18834 int index;
18835 switch (GET_MODE_SIZE (mode))
18836 {
18837 case 4:
18838 index = 0;
18839 break;
18840 case 8:
18841 index = 1;
18842 break;
18843 case 16:
18844 index = 2;
18845 break;
18846 default:
18847 return 100;
18848 }
18849 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18850 }
18851 if (MMX_CLASS_P (class))
18852 {
18853 int index;
18854 switch (GET_MODE_SIZE (mode))
18855 {
18856 case 4:
18857 index = 0;
18858 break;
18859 case 8:
18860 index = 1;
18861 break;
18862 default:
18863 return 100;
18864 }
18865 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18866 }
18867 switch (GET_MODE_SIZE (mode))
18868 {
18869 case 1:
18870 if (in)
18871 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18872 : ix86_cost->movzbl_load);
18873 else
18874 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18875 : ix86_cost->int_store[0] + 4);
18876 break;
18877 case 2:
18878 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18879 default:
18880 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18881 if (mode == TFmode)
18882 mode = XFmode;
18883 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18884 * (((int) GET_MODE_SIZE (mode)
18885 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18886 }
18887 }
18888
18889 /* Compute a (partial) cost for rtx X. Return true if the complete
18890 cost has been computed, and false if subexpressions should be
18891 scanned. In either case, *TOTAL contains the cost result. */
18892
18893 static bool
18894 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18895 {
18896 enum machine_mode mode = GET_MODE (x);
18897
18898 switch (code)
18899 {
18900 case CONST_INT:
18901 case CONST:
18902 case LABEL_REF:
18903 case SYMBOL_REF:
18904 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18905 *total = 3;
18906 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18907 *total = 2;
18908 else if (flag_pic && SYMBOLIC_CONST (x)
18909 && (!TARGET_64BIT
18910 || (!GET_CODE (x) != LABEL_REF
18911 && (GET_CODE (x) != SYMBOL_REF
18912 || !SYMBOL_REF_LOCAL_P (x)))))
18913 *total = 1;
18914 else
18915 *total = 0;
18916 return true;
18917
18918 case CONST_DOUBLE:
18919 if (mode == VOIDmode)
18920 *total = 0;
18921 else
18922 switch (standard_80387_constant_p (x))
18923 {
18924 case 1: /* 0.0 */
18925 *total = 1;
18926 break;
18927 default: /* Other constants */
18928 *total = 2;
18929 break;
18930 case 0:
18931 case -1:
18932 /* Start with (MEM (SYMBOL_REF)), since that's where
18933 it'll probably end up. Add a penalty for size. */
18934 *total = (COSTS_N_INSNS (1)
18935 + (flag_pic != 0 && !TARGET_64BIT)
18936 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18937 break;
18938 }
18939 return true;
18940
18941 case ZERO_EXTEND:
18942 /* The zero extensions is often completely free on x86_64, so make
18943 it as cheap as possible. */
18944 if (TARGET_64BIT && mode == DImode
18945 && GET_MODE (XEXP (x, 0)) == SImode)
18946 *total = 1;
18947 else if (TARGET_ZERO_EXTEND_WITH_AND)
18948 *total = ix86_cost->add;
18949 else
18950 *total = ix86_cost->movzx;
18951 return false;
18952
18953 case SIGN_EXTEND:
18954 *total = ix86_cost->movsx;
18955 return false;
18956
18957 case ASHIFT:
18958 if (CONST_INT_P (XEXP (x, 1))
18959 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18960 {
18961 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18962 if (value == 1)
18963 {
18964 *total = ix86_cost->add;
18965 return false;
18966 }
18967 if ((value == 2 || value == 3)
18968 && ix86_cost->lea <= ix86_cost->shift_const)
18969 {
18970 *total = ix86_cost->lea;
18971 return false;
18972 }
18973 }
18974 /* FALLTHRU */
18975
18976 case ROTATE:
18977 case ASHIFTRT:
18978 case LSHIFTRT:
18979 case ROTATERT:
18980 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
18981 {
18982 if (CONST_INT_P (XEXP (x, 1)))
18983 {
18984 if (INTVAL (XEXP (x, 1)) > 32)
18985 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
18986 else
18987 *total = ix86_cost->shift_const * 2;
18988 }
18989 else
18990 {
18991 if (GET_CODE (XEXP (x, 1)) == AND)
18992 *total = ix86_cost->shift_var * 2;
18993 else
18994 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
18995 }
18996 }
18997 else
18998 {
18999 if (CONST_INT_P (XEXP (x, 1)))
19000 *total = ix86_cost->shift_const;
19001 else
19002 *total = ix86_cost->shift_var;
19003 }
19004 return false;
19005
19006 case MULT:
19007 if (FLOAT_MODE_P (mode))
19008 {
19009 *total = ix86_cost->fmul;
19010 return false;
19011 }
19012 else
19013 {
19014 rtx op0 = XEXP (x, 0);
19015 rtx op1 = XEXP (x, 1);
19016 int nbits;
19017 if (CONST_INT_P (XEXP (x, 1)))
19018 {
19019 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19020 for (nbits = 0; value != 0; value &= value - 1)
19021 nbits++;
19022 }
19023 else
19024 /* This is arbitrary. */
19025 nbits = 7;
19026
19027 /* Compute costs correctly for widening multiplication. */
19028 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19029 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19030 == GET_MODE_SIZE (mode))
19031 {
19032 int is_mulwiden = 0;
19033 enum machine_mode inner_mode = GET_MODE (op0);
19034
19035 if (GET_CODE (op0) == GET_CODE (op1))
19036 is_mulwiden = 1, op1 = XEXP (op1, 0);
19037 else if (CONST_INT_P (op1))
19038 {
19039 if (GET_CODE (op0) == SIGN_EXTEND)
19040 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19041 == INTVAL (op1);
19042 else
19043 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19044 }
19045
19046 if (is_mulwiden)
19047 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19048 }
19049
19050 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19051 + nbits * ix86_cost->mult_bit
19052 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19053
19054 return true;
19055 }
19056
19057 case DIV:
19058 case UDIV:
19059 case MOD:
19060 case UMOD:
19061 if (FLOAT_MODE_P (mode))
19062 *total = ix86_cost->fdiv;
19063 else
19064 *total = ix86_cost->divide[MODE_INDEX (mode)];
19065 return false;
19066
19067 case PLUS:
19068 if (FLOAT_MODE_P (mode))
19069 *total = ix86_cost->fadd;
19070 else if (GET_MODE_CLASS (mode) == MODE_INT
19071 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19072 {
19073 if (GET_CODE (XEXP (x, 0)) == PLUS
19074 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19075 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19076 && CONSTANT_P (XEXP (x, 1)))
19077 {
19078 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19079 if (val == 2 || val == 4 || val == 8)
19080 {
19081 *total = ix86_cost->lea;
19082 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19083 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19084 outer_code);
19085 *total += rtx_cost (XEXP (x, 1), outer_code);
19086 return true;
19087 }
19088 }
19089 else if (GET_CODE (XEXP (x, 0)) == MULT
19090 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19091 {
19092 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19093 if (val == 2 || val == 4 || val == 8)
19094 {
19095 *total = ix86_cost->lea;
19096 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19097 *total += rtx_cost (XEXP (x, 1), outer_code);
19098 return true;
19099 }
19100 }
19101 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19102 {
19103 *total = ix86_cost->lea;
19104 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19105 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19106 *total += rtx_cost (XEXP (x, 1), outer_code);
19107 return true;
19108 }
19109 }
19110 /* FALLTHRU */
19111
19112 case MINUS:
19113 if (FLOAT_MODE_P (mode))
19114 {
19115 *total = ix86_cost->fadd;
19116 return false;
19117 }
19118 /* FALLTHRU */
19119
19120 case AND:
19121 case IOR:
19122 case XOR:
19123 if (!TARGET_64BIT && mode == DImode)
19124 {
19125 *total = (ix86_cost->add * 2
19126 + (rtx_cost (XEXP (x, 0), outer_code)
19127 << (GET_MODE (XEXP (x, 0)) != DImode))
19128 + (rtx_cost (XEXP (x, 1), outer_code)
19129 << (GET_MODE (XEXP (x, 1)) != DImode)));
19130 return true;
19131 }
19132 /* FALLTHRU */
19133
19134 case NEG:
19135 if (FLOAT_MODE_P (mode))
19136 {
19137 *total = ix86_cost->fchs;
19138 return false;
19139 }
19140 /* FALLTHRU */
19141
19142 case NOT:
19143 if (!TARGET_64BIT && mode == DImode)
19144 *total = ix86_cost->add * 2;
19145 else
19146 *total = ix86_cost->add;
19147 return false;
19148
19149 case COMPARE:
19150 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19151 && XEXP (XEXP (x, 0), 1) == const1_rtx
19152 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19153 && XEXP (x, 1) == const0_rtx)
19154 {
19155 /* This kind of construct is implemented using test[bwl].
19156 Treat it as if we had an AND. */
19157 *total = (ix86_cost->add
19158 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19159 + rtx_cost (const1_rtx, outer_code));
19160 return true;
19161 }
19162 return false;
19163
19164 case FLOAT_EXTEND:
19165 if (!TARGET_SSE_MATH
19166 || mode == XFmode
19167 || (mode == DFmode && !TARGET_SSE2))
19168 *total = 0;
19169 return false;
19170
19171 case ABS:
19172 if (FLOAT_MODE_P (mode))
19173 *total = ix86_cost->fabs;
19174 return false;
19175
19176 case SQRT:
19177 if (FLOAT_MODE_P (mode))
19178 *total = ix86_cost->fsqrt;
19179 return false;
19180
19181 case UNSPEC:
19182 if (XINT (x, 1) == UNSPEC_TP)
19183 *total = 0;
19184 return false;
19185
19186 default:
19187 return false;
19188 }
19189 }
19190
19191 #if TARGET_MACHO
19192
19193 static int current_machopic_label_num;
19194
19195 /* Given a symbol name and its associated stub, write out the
19196 definition of the stub. */
19197
19198 void
19199 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19200 {
19201 unsigned int length;
19202 char *binder_name, *symbol_name, lazy_ptr_name[32];
19203 int label = ++current_machopic_label_num;
19204
19205 /* For 64-bit we shouldn't get here. */
19206 gcc_assert (!TARGET_64BIT);
19207
19208 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19209 symb = (*targetm.strip_name_encoding) (symb);
19210
19211 length = strlen (stub);
19212 binder_name = alloca (length + 32);
19213 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19214
19215 length = strlen (symb);
19216 symbol_name = alloca (length + 32);
19217 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19218
19219 sprintf (lazy_ptr_name, "L%d$lz", label);
19220
19221 if (MACHOPIC_PURE)
19222 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19223 else
19224 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19225
19226 fprintf (file, "%s:\n", stub);
19227 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19228
19229 if (MACHOPIC_PURE)
19230 {
19231 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19232 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19233 fprintf (file, "\tjmp\t*%%edx\n");
19234 }
19235 else
19236 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19237
19238 fprintf (file, "%s:\n", binder_name);
19239
19240 if (MACHOPIC_PURE)
19241 {
19242 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19243 fprintf (file, "\tpushl\t%%eax\n");
19244 }
19245 else
19246 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19247
19248 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19249
19250 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19251 fprintf (file, "%s:\n", lazy_ptr_name);
19252 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19253 fprintf (file, "\t.long %s\n", binder_name);
19254 }
19255
19256 void
19257 darwin_x86_file_end (void)
19258 {
19259 darwin_file_end ();
19260 ix86_file_end ();
19261 }
19262 #endif /* TARGET_MACHO */
19263
19264 /* Order the registers for register allocator. */
19265
19266 void
19267 x86_order_regs_for_local_alloc (void)
19268 {
19269 int pos = 0;
19270 int i;
19271
19272 /* First allocate the local general purpose registers. */
19273 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19274 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19275 reg_alloc_order [pos++] = i;
19276
19277 /* Global general purpose registers. */
19278 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19279 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19280 reg_alloc_order [pos++] = i;
19281
19282 /* x87 registers come first in case we are doing FP math
19283 using them. */
19284 if (!TARGET_SSE_MATH)
19285 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19286 reg_alloc_order [pos++] = i;
19287
19288 /* SSE registers. */
19289 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19290 reg_alloc_order [pos++] = i;
19291 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19292 reg_alloc_order [pos++] = i;
19293
19294 /* x87 registers. */
19295 if (TARGET_SSE_MATH)
19296 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19297 reg_alloc_order [pos++] = i;
19298
19299 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19300 reg_alloc_order [pos++] = i;
19301
19302 /* Initialize the rest of array as we do not allocate some registers
19303 at all. */
19304 while (pos < FIRST_PSEUDO_REGISTER)
19305 reg_alloc_order [pos++] = 0;
19306 }
19307
19308 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19309 struct attribute_spec.handler. */
19310 static tree
19311 ix86_handle_struct_attribute (tree *node, tree name,
19312 tree args ATTRIBUTE_UNUSED,
19313 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19314 {
19315 tree *type = NULL;
19316 if (DECL_P (*node))
19317 {
19318 if (TREE_CODE (*node) == TYPE_DECL)
19319 type = &TREE_TYPE (*node);
19320 }
19321 else
19322 type = node;
19323
19324 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19325 || TREE_CODE (*type) == UNION_TYPE)))
19326 {
19327 warning (OPT_Wattributes, "%qs attribute ignored",
19328 IDENTIFIER_POINTER (name));
19329 *no_add_attrs = true;
19330 }
19331
19332 else if ((is_attribute_p ("ms_struct", name)
19333 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19334 || ((is_attribute_p ("gcc_struct", name)
19335 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19336 {
19337 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19338 IDENTIFIER_POINTER (name));
19339 *no_add_attrs = true;
19340 }
19341
19342 return NULL_TREE;
19343 }
19344
19345 static bool
19346 ix86_ms_bitfield_layout_p (tree record_type)
19347 {
19348 return (TARGET_MS_BITFIELD_LAYOUT &&
19349 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19350 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19351 }
19352
19353 /* Returns an expression indicating where the this parameter is
19354 located on entry to the FUNCTION. */
19355
19356 static rtx
19357 x86_this_parameter (tree function)
19358 {
19359 tree type = TREE_TYPE (function);
19360
19361 if (TARGET_64BIT)
19362 {
19363 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19364 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19365 }
19366
19367 if (ix86_function_regparm (type, function) > 0)
19368 {
19369 tree parm;
19370
19371 parm = TYPE_ARG_TYPES (type);
19372 /* Figure out whether or not the function has a variable number of
19373 arguments. */
19374 for (; parm; parm = TREE_CHAIN (parm))
19375 if (TREE_VALUE (parm) == void_type_node)
19376 break;
19377 /* If not, the this parameter is in the first argument. */
19378 if (parm)
19379 {
19380 int regno = 0;
19381 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19382 regno = 2;
19383 return gen_rtx_REG (SImode, regno);
19384 }
19385 }
19386
19387 if (aggregate_value_p (TREE_TYPE (type), type))
19388 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19389 else
19390 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19391 }
19392
19393 /* Determine whether x86_output_mi_thunk can succeed. */
19394
19395 static bool
19396 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19397 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19398 HOST_WIDE_INT vcall_offset, tree function)
19399 {
19400 /* 64-bit can handle anything. */
19401 if (TARGET_64BIT)
19402 return true;
19403
19404 /* For 32-bit, everything's fine if we have one free register. */
19405 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19406 return true;
19407
19408 /* Need a free register for vcall_offset. */
19409 if (vcall_offset)
19410 return false;
19411
19412 /* Need a free register for GOT references. */
19413 if (flag_pic && !(*targetm.binds_local_p) (function))
19414 return false;
19415
19416 /* Otherwise ok. */
19417 return true;
19418 }
19419
19420 /* Output the assembler code for a thunk function. THUNK_DECL is the
19421 declaration for the thunk function itself, FUNCTION is the decl for
19422 the target function. DELTA is an immediate constant offset to be
19423 added to THIS. If VCALL_OFFSET is nonzero, the word at
19424 *(*this + vcall_offset) should be added to THIS. */
19425
19426 static void
19427 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19428 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19429 HOST_WIDE_INT vcall_offset, tree function)
19430 {
19431 rtx xops[3];
19432 rtx this = x86_this_parameter (function);
19433 rtx this_reg, tmp;
19434
19435 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19436 pull it in now and let DELTA benefit. */
19437 if (REG_P (this))
19438 this_reg = this;
19439 else if (vcall_offset)
19440 {
19441 /* Put the this parameter into %eax. */
19442 xops[0] = this;
19443 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19444 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19445 }
19446 else
19447 this_reg = NULL_RTX;
19448
19449 /* Adjust the this parameter by a fixed constant. */
19450 if (delta)
19451 {
19452 xops[0] = GEN_INT (delta);
19453 xops[1] = this_reg ? this_reg : this;
19454 if (TARGET_64BIT)
19455 {
19456 if (!x86_64_general_operand (xops[0], DImode))
19457 {
19458 tmp = gen_rtx_REG (DImode, R10_REG);
19459 xops[1] = tmp;
19460 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19461 xops[0] = tmp;
19462 xops[1] = this;
19463 }
19464 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19465 }
19466 else
19467 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19468 }
19469
19470 /* Adjust the this parameter by a value stored in the vtable. */
19471 if (vcall_offset)
19472 {
19473 if (TARGET_64BIT)
19474 tmp = gen_rtx_REG (DImode, R10_REG);
19475 else
19476 {
19477 int tmp_regno = 2 /* ECX */;
19478 if (lookup_attribute ("fastcall",
19479 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19480 tmp_regno = 0 /* EAX */;
19481 tmp = gen_rtx_REG (SImode, tmp_regno);
19482 }
19483
19484 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19485 xops[1] = tmp;
19486 if (TARGET_64BIT)
19487 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19488 else
19489 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19490
19491 /* Adjust the this parameter. */
19492 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19493 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19494 {
19495 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19496 xops[0] = GEN_INT (vcall_offset);
19497 xops[1] = tmp2;
19498 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19499 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19500 }
19501 xops[1] = this_reg;
19502 if (TARGET_64BIT)
19503 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19504 else
19505 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19506 }
19507
19508 /* If necessary, drop THIS back to its stack slot. */
19509 if (this_reg && this_reg != this)
19510 {
19511 xops[0] = this_reg;
19512 xops[1] = this;
19513 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19514 }
19515
19516 xops[0] = XEXP (DECL_RTL (function), 0);
19517 if (TARGET_64BIT)
19518 {
19519 if (!flag_pic || (*targetm.binds_local_p) (function))
19520 output_asm_insn ("jmp\t%P0", xops);
19521 else
19522 {
19523 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19524 tmp = gen_rtx_CONST (Pmode, tmp);
19525 tmp = gen_rtx_MEM (QImode, tmp);
19526 xops[0] = tmp;
19527 output_asm_insn ("jmp\t%A0", xops);
19528 }
19529 }
19530 else
19531 {
19532 if (!flag_pic || (*targetm.binds_local_p) (function))
19533 output_asm_insn ("jmp\t%P0", xops);
19534 else
19535 #if TARGET_MACHO
19536 if (TARGET_MACHO)
19537 {
19538 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19539 tmp = (gen_rtx_SYMBOL_REF
19540 (Pmode,
19541 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19542 tmp = gen_rtx_MEM (QImode, tmp);
19543 xops[0] = tmp;
19544 output_asm_insn ("jmp\t%0", xops);
19545 }
19546 else
19547 #endif /* TARGET_MACHO */
19548 {
19549 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19550 output_set_got (tmp, NULL_RTX);
19551
19552 xops[1] = tmp;
19553 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19554 output_asm_insn ("jmp\t{*}%1", xops);
19555 }
19556 }
19557 }
19558
19559 static void
19560 x86_file_start (void)
19561 {
19562 default_file_start ();
19563 #if TARGET_MACHO
19564 darwin_file_start ();
19565 #endif
19566 if (X86_FILE_START_VERSION_DIRECTIVE)
19567 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19568 if (X86_FILE_START_FLTUSED)
19569 fputs ("\t.global\t__fltused\n", asm_out_file);
19570 if (ix86_asm_dialect == ASM_INTEL)
19571 fputs ("\t.intel_syntax\n", asm_out_file);
19572 }
19573
19574 int
19575 x86_field_alignment (tree field, int computed)
19576 {
19577 enum machine_mode mode;
19578 tree type = TREE_TYPE (field);
19579
19580 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19581 return computed;
19582 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19583 ? get_inner_array_type (type) : type);
19584 if (mode == DFmode || mode == DCmode
19585 || GET_MODE_CLASS (mode) == MODE_INT
19586 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19587 return MIN (32, computed);
19588 return computed;
19589 }
19590
19591 /* Output assembler code to FILE to increment profiler label # LABELNO
19592 for profiling a function entry. */
19593 void
19594 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19595 {
19596 if (TARGET_64BIT)
19597 if (flag_pic)
19598 {
19599 #ifndef NO_PROFILE_COUNTERS
19600 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19601 #endif
19602 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19603 }
19604 else
19605 {
19606 #ifndef NO_PROFILE_COUNTERS
19607 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19608 #endif
19609 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19610 }
19611 else if (flag_pic)
19612 {
19613 #ifndef NO_PROFILE_COUNTERS
19614 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19615 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19616 #endif
19617 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19618 }
19619 else
19620 {
19621 #ifndef NO_PROFILE_COUNTERS
19622 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19623 PROFILE_COUNT_REGISTER);
19624 #endif
19625 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19626 }
19627 }
19628
19629 /* We don't have exact information about the insn sizes, but we may assume
19630 quite safely that we are informed about all 1 byte insns and memory
19631 address sizes. This is enough to eliminate unnecessary padding in
19632 99% of cases. */
19633
19634 static int
19635 min_insn_size (rtx insn)
19636 {
19637 int l = 0;
19638
19639 if (!INSN_P (insn) || !active_insn_p (insn))
19640 return 0;
19641
19642 /* Discard alignments we've emit and jump instructions. */
19643 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19644 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19645 return 0;
19646 if (JUMP_P (insn)
19647 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19648 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19649 return 0;
19650
19651 /* Important case - calls are always 5 bytes.
19652 It is common to have many calls in the row. */
19653 if (CALL_P (insn)
19654 && symbolic_reference_mentioned_p (PATTERN (insn))
19655 && !SIBLING_CALL_P (insn))
19656 return 5;
19657 if (get_attr_length (insn) <= 1)
19658 return 1;
19659
19660 /* For normal instructions we may rely on the sizes of addresses
19661 and the presence of symbol to require 4 bytes of encoding.
19662 This is not the case for jumps where references are PC relative. */
19663 if (!JUMP_P (insn))
19664 {
19665 l = get_attr_length_address (insn);
19666 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19667 l = 4;
19668 }
19669 if (l)
19670 return 1+l;
19671 else
19672 return 2;
19673 }
19674
19675 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19676 window. */
19677
19678 static void
19679 ix86_avoid_jump_misspredicts (void)
19680 {
19681 rtx insn, start = get_insns ();
19682 int nbytes = 0, njumps = 0;
19683 int isjump = 0;
19684
19685 /* Look for all minimal intervals of instructions containing 4 jumps.
19686 The intervals are bounded by START and INSN. NBYTES is the total
19687 size of instructions in the interval including INSN and not including
19688 START. When the NBYTES is smaller than 16 bytes, it is possible
19689 that the end of START and INSN ends up in the same 16byte page.
19690
19691 The smallest offset in the page INSN can start is the case where START
19692 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19693 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19694 */
19695 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19696 {
19697
19698 nbytes += min_insn_size (insn);
19699 if (dump_file)
19700 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19701 INSN_UID (insn), min_insn_size (insn));
19702 if ((JUMP_P (insn)
19703 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19704 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19705 || CALL_P (insn))
19706 njumps++;
19707 else
19708 continue;
19709
19710 while (njumps > 3)
19711 {
19712 start = NEXT_INSN (start);
19713 if ((JUMP_P (start)
19714 && GET_CODE (PATTERN (start)) != ADDR_VEC
19715 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19716 || CALL_P (start))
19717 njumps--, isjump = 1;
19718 else
19719 isjump = 0;
19720 nbytes -= min_insn_size (start);
19721 }
19722 gcc_assert (njumps >= 0);
19723 if (dump_file)
19724 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19725 INSN_UID (start), INSN_UID (insn), nbytes);
19726
19727 if (njumps == 3 && isjump && nbytes < 16)
19728 {
19729 int padsize = 15 - nbytes + min_insn_size (insn);
19730
19731 if (dump_file)
19732 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19733 INSN_UID (insn), padsize);
19734 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19735 }
19736 }
19737 }
19738
19739 /* AMD Athlon works faster
19740 when RET is not destination of conditional jump or directly preceded
19741 by other jump instruction. We avoid the penalty by inserting NOP just
19742 before the RET instructions in such cases. */
19743 static void
19744 ix86_pad_returns (void)
19745 {
19746 edge e;
19747 edge_iterator ei;
19748
19749 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19750 {
19751 basic_block bb = e->src;
19752 rtx ret = BB_END (bb);
19753 rtx prev;
19754 bool replace = false;
19755
19756 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19757 || !maybe_hot_bb_p (bb))
19758 continue;
19759 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19760 if (active_insn_p (prev) || LABEL_P (prev))
19761 break;
19762 if (prev && LABEL_P (prev))
19763 {
19764 edge e;
19765 edge_iterator ei;
19766
19767 FOR_EACH_EDGE (e, ei, bb->preds)
19768 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19769 && !(e->flags & EDGE_FALLTHRU))
19770 replace = true;
19771 }
19772 if (!replace)
19773 {
19774 prev = prev_active_insn (ret);
19775 if (prev
19776 && ((JUMP_P (prev) && any_condjump_p (prev))
19777 || CALL_P (prev)))
19778 replace = true;
19779 /* Empty functions get branch mispredict even when the jump destination
19780 is not visible to us. */
19781 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19782 replace = true;
19783 }
19784 if (replace)
19785 {
19786 emit_insn_before (gen_return_internal_long (), ret);
19787 delete_insn (ret);
19788 }
19789 }
19790 }
19791
19792 /* Implement machine specific optimizations. We implement padding of returns
19793 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19794 static void
19795 ix86_reorg (void)
19796 {
19797 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19798 ix86_pad_returns ();
19799 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19800 ix86_avoid_jump_misspredicts ();
19801 }
19802
19803 /* Return nonzero when QImode register that must be represented via REX prefix
19804 is used. */
19805 bool
19806 x86_extended_QIreg_mentioned_p (rtx insn)
19807 {
19808 int i;
19809 extract_insn_cached (insn);
19810 for (i = 0; i < recog_data.n_operands; i++)
19811 if (REG_P (recog_data.operand[i])
19812 && REGNO (recog_data.operand[i]) >= 4)
19813 return true;
19814 return false;
19815 }
19816
19817 /* Return nonzero when P points to register encoded via REX prefix.
19818 Called via for_each_rtx. */
19819 static int
19820 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19821 {
19822 unsigned int regno;
19823 if (!REG_P (*p))
19824 return 0;
19825 regno = REGNO (*p);
19826 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19827 }
19828
19829 /* Return true when INSN mentions register that must be encoded using REX
19830 prefix. */
19831 bool
19832 x86_extended_reg_mentioned_p (rtx insn)
19833 {
19834 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19835 }
19836
19837 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19838 optabs would emit if we didn't have TFmode patterns. */
19839
19840 void
19841 x86_emit_floatuns (rtx operands[2])
19842 {
19843 rtx neglab, donelab, i0, i1, f0, in, out;
19844 enum machine_mode mode, inmode;
19845
19846 inmode = GET_MODE (operands[1]);
19847 gcc_assert (inmode == SImode || inmode == DImode);
19848
19849 out = operands[0];
19850 in = force_reg (inmode, operands[1]);
19851 mode = GET_MODE (out);
19852 neglab = gen_label_rtx ();
19853 donelab = gen_label_rtx ();
19854 f0 = gen_reg_rtx (mode);
19855
19856 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19857
19858 expand_float (out, in, 0);
19859
19860 emit_jump_insn (gen_jump (donelab));
19861 emit_barrier ();
19862
19863 emit_label (neglab);
19864
19865 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19866 1, OPTAB_DIRECT);
19867 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19868 1, OPTAB_DIRECT);
19869 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19870
19871 expand_float (f0, i0, 0);
19872
19873 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19874
19875 emit_label (donelab);
19876 }
19877 \f
19878 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19879 with all elements equal to VAR. Return true if successful. */
19880
19881 static bool
19882 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19883 rtx target, rtx val)
19884 {
19885 enum machine_mode smode, wsmode, wvmode;
19886 rtx x;
19887
19888 switch (mode)
19889 {
19890 case V2SImode:
19891 case V2SFmode:
19892 if (!mmx_ok)
19893 return false;
19894 /* FALLTHRU */
19895
19896 case V2DFmode:
19897 case V2DImode:
19898 case V4SFmode:
19899 case V4SImode:
19900 val = force_reg (GET_MODE_INNER (mode), val);
19901 x = gen_rtx_VEC_DUPLICATE (mode, val);
19902 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19903 return true;
19904
19905 case V4HImode:
19906 if (!mmx_ok)
19907 return false;
19908 if (TARGET_SSE || TARGET_3DNOW_A)
19909 {
19910 val = gen_lowpart (SImode, val);
19911 x = gen_rtx_TRUNCATE (HImode, val);
19912 x = gen_rtx_VEC_DUPLICATE (mode, x);
19913 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19914 return true;
19915 }
19916 else
19917 {
19918 smode = HImode;
19919 wsmode = SImode;
19920 wvmode = V2SImode;
19921 goto widen;
19922 }
19923
19924 case V8QImode:
19925 if (!mmx_ok)
19926 return false;
19927 smode = QImode;
19928 wsmode = HImode;
19929 wvmode = V4HImode;
19930 goto widen;
19931 case V8HImode:
19932 if (TARGET_SSE2)
19933 {
19934 rtx tmp1, tmp2;
19935 /* Extend HImode to SImode using a paradoxical SUBREG. */
19936 tmp1 = gen_reg_rtx (SImode);
19937 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19938 /* Insert the SImode value as low element of V4SImode vector. */
19939 tmp2 = gen_reg_rtx (V4SImode);
19940 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19941 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19942 CONST0_RTX (V4SImode),
19943 const1_rtx);
19944 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19945 /* Cast the V4SImode vector back to a V8HImode vector. */
19946 tmp1 = gen_reg_rtx (V8HImode);
19947 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19948 /* Duplicate the low short through the whole low SImode word. */
19949 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19950 /* Cast the V8HImode vector back to a V4SImode vector. */
19951 tmp2 = gen_reg_rtx (V4SImode);
19952 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19953 /* Replicate the low element of the V4SImode vector. */
19954 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19955 /* Cast the V2SImode back to V8HImode, and store in target. */
19956 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19957 return true;
19958 }
19959 smode = HImode;
19960 wsmode = SImode;
19961 wvmode = V4SImode;
19962 goto widen;
19963 case V16QImode:
19964 if (TARGET_SSE2)
19965 {
19966 rtx tmp1, tmp2;
19967 /* Extend QImode to SImode using a paradoxical SUBREG. */
19968 tmp1 = gen_reg_rtx (SImode);
19969 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19970 /* Insert the SImode value as low element of V4SImode vector. */
19971 tmp2 = gen_reg_rtx (V4SImode);
19972 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19973 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19974 CONST0_RTX (V4SImode),
19975 const1_rtx);
19976 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19977 /* Cast the V4SImode vector back to a V16QImode vector. */
19978 tmp1 = gen_reg_rtx (V16QImode);
19979 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
19980 /* Duplicate the low byte through the whole low SImode word. */
19981 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19982 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
19983 /* Cast the V16QImode vector back to a V4SImode vector. */
19984 tmp2 = gen_reg_rtx (V4SImode);
19985 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19986 /* Replicate the low element of the V4SImode vector. */
19987 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19988 /* Cast the V2SImode back to V16QImode, and store in target. */
19989 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
19990 return true;
19991 }
19992 smode = QImode;
19993 wsmode = HImode;
19994 wvmode = V8HImode;
19995 goto widen;
19996 widen:
19997 /* Replicate the value once into the next wider mode and recurse. */
19998 val = convert_modes (wsmode, smode, val, true);
19999 x = expand_simple_binop (wsmode, ASHIFT, val,
20000 GEN_INT (GET_MODE_BITSIZE (smode)),
20001 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20002 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20003
20004 x = gen_reg_rtx (wvmode);
20005 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20006 gcc_unreachable ();
20007 emit_move_insn (target, gen_lowpart (mode, x));
20008 return true;
20009
20010 default:
20011 return false;
20012 }
20013 }
20014
20015 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20016 whose ONE_VAR element is VAR, and other elements are zero. Return true
20017 if successful. */
20018
20019 static bool
20020 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20021 rtx target, rtx var, int one_var)
20022 {
20023 enum machine_mode vsimode;
20024 rtx new_target;
20025 rtx x, tmp;
20026
20027 switch (mode)
20028 {
20029 case V2SFmode:
20030 case V2SImode:
20031 if (!mmx_ok)
20032 return false;
20033 /* FALLTHRU */
20034
20035 case V2DFmode:
20036 case V2DImode:
20037 if (one_var != 0)
20038 return false;
20039 var = force_reg (GET_MODE_INNER (mode), var);
20040 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20041 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20042 return true;
20043
20044 case V4SFmode:
20045 case V4SImode:
20046 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20047 new_target = gen_reg_rtx (mode);
20048 else
20049 new_target = target;
20050 var = force_reg (GET_MODE_INNER (mode), var);
20051 x = gen_rtx_VEC_DUPLICATE (mode, var);
20052 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20053 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20054 if (one_var != 0)
20055 {
20056 /* We need to shuffle the value to the correct position, so
20057 create a new pseudo to store the intermediate result. */
20058
20059 /* With SSE2, we can use the integer shuffle insns. */
20060 if (mode != V4SFmode && TARGET_SSE2)
20061 {
20062 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20063 GEN_INT (1),
20064 GEN_INT (one_var == 1 ? 0 : 1),
20065 GEN_INT (one_var == 2 ? 0 : 1),
20066 GEN_INT (one_var == 3 ? 0 : 1)));
20067 if (target != new_target)
20068 emit_move_insn (target, new_target);
20069 return true;
20070 }
20071
20072 /* Otherwise convert the intermediate result to V4SFmode and
20073 use the SSE1 shuffle instructions. */
20074 if (mode != V4SFmode)
20075 {
20076 tmp = gen_reg_rtx (V4SFmode);
20077 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20078 }
20079 else
20080 tmp = new_target;
20081
20082 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20083 GEN_INT (1),
20084 GEN_INT (one_var == 1 ? 0 : 1),
20085 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20086 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20087
20088 if (mode != V4SFmode)
20089 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20090 else if (tmp != target)
20091 emit_move_insn (target, tmp);
20092 }
20093 else if (target != new_target)
20094 emit_move_insn (target, new_target);
20095 return true;
20096
20097 case V8HImode:
20098 case V16QImode:
20099 vsimode = V4SImode;
20100 goto widen;
20101 case V4HImode:
20102 case V8QImode:
20103 if (!mmx_ok)
20104 return false;
20105 vsimode = V2SImode;
20106 goto widen;
20107 widen:
20108 if (one_var != 0)
20109 return false;
20110
20111 /* Zero extend the variable element to SImode and recurse. */
20112 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20113
20114 x = gen_reg_rtx (vsimode);
20115 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20116 var, one_var))
20117 gcc_unreachable ();
20118
20119 emit_move_insn (target, gen_lowpart (mode, x));
20120 return true;
20121
20122 default:
20123 return false;
20124 }
20125 }
20126
20127 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20128 consisting of the values in VALS. It is known that all elements
20129 except ONE_VAR are constants. Return true if successful. */
20130
20131 static bool
20132 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20133 rtx target, rtx vals, int one_var)
20134 {
20135 rtx var = XVECEXP (vals, 0, one_var);
20136 enum machine_mode wmode;
20137 rtx const_vec, x;
20138
20139 const_vec = copy_rtx (vals);
20140 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20141 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20142
20143 switch (mode)
20144 {
20145 case V2DFmode:
20146 case V2DImode:
20147 case V2SFmode:
20148 case V2SImode:
20149 /* For the two element vectors, it's just as easy to use
20150 the general case. */
20151 return false;
20152
20153 case V4SFmode:
20154 case V4SImode:
20155 case V8HImode:
20156 case V4HImode:
20157 break;
20158
20159 case V16QImode:
20160 wmode = V8HImode;
20161 goto widen;
20162 case V8QImode:
20163 wmode = V4HImode;
20164 goto widen;
20165 widen:
20166 /* There's no way to set one QImode entry easily. Combine
20167 the variable value with its adjacent constant value, and
20168 promote to an HImode set. */
20169 x = XVECEXP (vals, 0, one_var ^ 1);
20170 if (one_var & 1)
20171 {
20172 var = convert_modes (HImode, QImode, var, true);
20173 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20174 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20175 x = GEN_INT (INTVAL (x) & 0xff);
20176 }
20177 else
20178 {
20179 var = convert_modes (HImode, QImode, var, true);
20180 x = gen_int_mode (INTVAL (x) << 8, HImode);
20181 }
20182 if (x != const0_rtx)
20183 var = expand_simple_binop (HImode, IOR, var, x, var,
20184 1, OPTAB_LIB_WIDEN);
20185
20186 x = gen_reg_rtx (wmode);
20187 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20188 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20189
20190 emit_move_insn (target, gen_lowpart (mode, x));
20191 return true;
20192
20193 default:
20194 return false;
20195 }
20196
20197 emit_move_insn (target, const_vec);
20198 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20199 return true;
20200 }
20201
20202 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20203 all values variable, and none identical. */
20204
20205 static void
20206 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20207 rtx target, rtx vals)
20208 {
20209 enum machine_mode half_mode = GET_MODE_INNER (mode);
20210 rtx op0 = NULL, op1 = NULL;
20211 bool use_vec_concat = false;
20212
20213 switch (mode)
20214 {
20215 case V2SFmode:
20216 case V2SImode:
20217 if (!mmx_ok && !TARGET_SSE)
20218 break;
20219 /* FALLTHRU */
20220
20221 case V2DFmode:
20222 case V2DImode:
20223 /* For the two element vectors, we always implement VEC_CONCAT. */
20224 op0 = XVECEXP (vals, 0, 0);
20225 op1 = XVECEXP (vals, 0, 1);
20226 use_vec_concat = true;
20227 break;
20228
20229 case V4SFmode:
20230 half_mode = V2SFmode;
20231 goto half;
20232 case V4SImode:
20233 half_mode = V2SImode;
20234 goto half;
20235 half:
20236 {
20237 rtvec v;
20238
20239 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20240 Recurse to load the two halves. */
20241
20242 op0 = gen_reg_rtx (half_mode);
20243 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20244 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20245
20246 op1 = gen_reg_rtx (half_mode);
20247 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20248 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20249
20250 use_vec_concat = true;
20251 }
20252 break;
20253
20254 case V8HImode:
20255 case V16QImode:
20256 case V4HImode:
20257 case V8QImode:
20258 break;
20259
20260 default:
20261 gcc_unreachable ();
20262 }
20263
20264 if (use_vec_concat)
20265 {
20266 if (!register_operand (op0, half_mode))
20267 op0 = force_reg (half_mode, op0);
20268 if (!register_operand (op1, half_mode))
20269 op1 = force_reg (half_mode, op1);
20270
20271 emit_insn (gen_rtx_SET (VOIDmode, target,
20272 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20273 }
20274 else
20275 {
20276 int i, j, n_elts, n_words, n_elt_per_word;
20277 enum machine_mode inner_mode;
20278 rtx words[4], shift;
20279
20280 inner_mode = GET_MODE_INNER (mode);
20281 n_elts = GET_MODE_NUNITS (mode);
20282 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20283 n_elt_per_word = n_elts / n_words;
20284 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20285
20286 for (i = 0; i < n_words; ++i)
20287 {
20288 rtx word = NULL_RTX;
20289
20290 for (j = 0; j < n_elt_per_word; ++j)
20291 {
20292 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20293 elt = convert_modes (word_mode, inner_mode, elt, true);
20294
20295 if (j == 0)
20296 word = elt;
20297 else
20298 {
20299 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20300 word, 1, OPTAB_LIB_WIDEN);
20301 word = expand_simple_binop (word_mode, IOR, word, elt,
20302 word, 1, OPTAB_LIB_WIDEN);
20303 }
20304 }
20305
20306 words[i] = word;
20307 }
20308
20309 if (n_words == 1)
20310 emit_move_insn (target, gen_lowpart (mode, words[0]));
20311 else if (n_words == 2)
20312 {
20313 rtx tmp = gen_reg_rtx (mode);
20314 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20315 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20316 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20317 emit_move_insn (target, tmp);
20318 }
20319 else if (n_words == 4)
20320 {
20321 rtx tmp = gen_reg_rtx (V4SImode);
20322 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20323 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20324 emit_move_insn (target, gen_lowpart (mode, tmp));
20325 }
20326 else
20327 gcc_unreachable ();
20328 }
20329 }
20330
20331 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20332 instructions unless MMX_OK is true. */
20333
20334 void
20335 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20336 {
20337 enum machine_mode mode = GET_MODE (target);
20338 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20339 int n_elts = GET_MODE_NUNITS (mode);
20340 int n_var = 0, one_var = -1;
20341 bool all_same = true, all_const_zero = true;
20342 int i;
20343 rtx x;
20344
20345 for (i = 0; i < n_elts; ++i)
20346 {
20347 x = XVECEXP (vals, 0, i);
20348 if (!CONSTANT_P (x))
20349 n_var++, one_var = i;
20350 else if (x != CONST0_RTX (inner_mode))
20351 all_const_zero = false;
20352 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20353 all_same = false;
20354 }
20355
20356 /* Constants are best loaded from the constant pool. */
20357 if (n_var == 0)
20358 {
20359 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20360 return;
20361 }
20362
20363 /* If all values are identical, broadcast the value. */
20364 if (all_same
20365 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20366 XVECEXP (vals, 0, 0)))
20367 return;
20368
20369 /* Values where only one field is non-constant are best loaded from
20370 the pool and overwritten via move later. */
20371 if (n_var == 1)
20372 {
20373 if (all_const_zero
20374 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20375 XVECEXP (vals, 0, one_var),
20376 one_var))
20377 return;
20378
20379 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20380 return;
20381 }
20382
20383 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20384 }
20385
20386 void
20387 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20388 {
20389 enum machine_mode mode = GET_MODE (target);
20390 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20391 bool use_vec_merge = false;
20392 rtx tmp;
20393
20394 switch (mode)
20395 {
20396 case V2SFmode:
20397 case V2SImode:
20398 if (mmx_ok)
20399 {
20400 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20401 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20402 if (elt == 0)
20403 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20404 else
20405 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20406 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20407 return;
20408 }
20409 break;
20410
20411 case V2DFmode:
20412 case V2DImode:
20413 {
20414 rtx op0, op1;
20415
20416 /* For the two element vectors, we implement a VEC_CONCAT with
20417 the extraction of the other element. */
20418
20419 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20420 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20421
20422 if (elt == 0)
20423 op0 = val, op1 = tmp;
20424 else
20425 op0 = tmp, op1 = val;
20426
20427 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20428 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20429 }
20430 return;
20431
20432 case V4SFmode:
20433 switch (elt)
20434 {
20435 case 0:
20436 use_vec_merge = true;
20437 break;
20438
20439 case 1:
20440 /* tmp = target = A B C D */
20441 tmp = copy_to_reg (target);
20442 /* target = A A B B */
20443 emit_insn (gen_sse_unpcklps (target, target, target));
20444 /* target = X A B B */
20445 ix86_expand_vector_set (false, target, val, 0);
20446 /* target = A X C D */
20447 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20448 GEN_INT (1), GEN_INT (0),
20449 GEN_INT (2+4), GEN_INT (3+4)));
20450 return;
20451
20452 case 2:
20453 /* tmp = target = A B C D */
20454 tmp = copy_to_reg (target);
20455 /* tmp = X B C D */
20456 ix86_expand_vector_set (false, tmp, val, 0);
20457 /* target = A B X D */
20458 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20459 GEN_INT (0), GEN_INT (1),
20460 GEN_INT (0+4), GEN_INT (3+4)));
20461 return;
20462
20463 case 3:
20464 /* tmp = target = A B C D */
20465 tmp = copy_to_reg (target);
20466 /* tmp = X B C D */
20467 ix86_expand_vector_set (false, tmp, val, 0);
20468 /* target = A B X D */
20469 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20470 GEN_INT (0), GEN_INT (1),
20471 GEN_INT (2+4), GEN_INT (0+4)));
20472 return;
20473
20474 default:
20475 gcc_unreachable ();
20476 }
20477 break;
20478
20479 case V4SImode:
20480 /* Element 0 handled by vec_merge below. */
20481 if (elt == 0)
20482 {
20483 use_vec_merge = true;
20484 break;
20485 }
20486
20487 if (TARGET_SSE2)
20488 {
20489 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20490 store into element 0, then shuffle them back. */
20491
20492 rtx order[4];
20493
20494 order[0] = GEN_INT (elt);
20495 order[1] = const1_rtx;
20496 order[2] = const2_rtx;
20497 order[3] = GEN_INT (3);
20498 order[elt] = const0_rtx;
20499
20500 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20501 order[1], order[2], order[3]));
20502
20503 ix86_expand_vector_set (false, target, val, 0);
20504
20505 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20506 order[1], order[2], order[3]));
20507 }
20508 else
20509 {
20510 /* For SSE1, we have to reuse the V4SF code. */
20511 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20512 gen_lowpart (SFmode, val), elt);
20513 }
20514 return;
20515
20516 case V8HImode:
20517 use_vec_merge = TARGET_SSE2;
20518 break;
20519 case V4HImode:
20520 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20521 break;
20522
20523 case V16QImode:
20524 case V8QImode:
20525 default:
20526 break;
20527 }
20528
20529 if (use_vec_merge)
20530 {
20531 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20532 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20533 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20534 }
20535 else
20536 {
20537 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20538
20539 emit_move_insn (mem, target);
20540
20541 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20542 emit_move_insn (tmp, val);
20543
20544 emit_move_insn (target, mem);
20545 }
20546 }
20547
20548 void
20549 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20550 {
20551 enum machine_mode mode = GET_MODE (vec);
20552 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20553 bool use_vec_extr = false;
20554 rtx tmp;
20555
20556 switch (mode)
20557 {
20558 case V2SImode:
20559 case V2SFmode:
20560 if (!mmx_ok)
20561 break;
20562 /* FALLTHRU */
20563
20564 case V2DFmode:
20565 case V2DImode:
20566 use_vec_extr = true;
20567 break;
20568
20569 case V4SFmode:
20570 switch (elt)
20571 {
20572 case 0:
20573 tmp = vec;
20574 break;
20575
20576 case 1:
20577 case 3:
20578 tmp = gen_reg_rtx (mode);
20579 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20580 GEN_INT (elt), GEN_INT (elt),
20581 GEN_INT (elt+4), GEN_INT (elt+4)));
20582 break;
20583
20584 case 2:
20585 tmp = gen_reg_rtx (mode);
20586 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20587 break;
20588
20589 default:
20590 gcc_unreachable ();
20591 }
20592 vec = tmp;
20593 use_vec_extr = true;
20594 elt = 0;
20595 break;
20596
20597 case V4SImode:
20598 if (TARGET_SSE2)
20599 {
20600 switch (elt)
20601 {
20602 case 0:
20603 tmp = vec;
20604 break;
20605
20606 case 1:
20607 case 3:
20608 tmp = gen_reg_rtx (mode);
20609 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20610 GEN_INT (elt), GEN_INT (elt),
20611 GEN_INT (elt), GEN_INT (elt)));
20612 break;
20613
20614 case 2:
20615 tmp = gen_reg_rtx (mode);
20616 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20617 break;
20618
20619 default:
20620 gcc_unreachable ();
20621 }
20622 vec = tmp;
20623 use_vec_extr = true;
20624 elt = 0;
20625 }
20626 else
20627 {
20628 /* For SSE1, we have to reuse the V4SF code. */
20629 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20630 gen_lowpart (V4SFmode, vec), elt);
20631 return;
20632 }
20633 break;
20634
20635 case V8HImode:
20636 use_vec_extr = TARGET_SSE2;
20637 break;
20638 case V4HImode:
20639 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20640 break;
20641
20642 case V16QImode:
20643 case V8QImode:
20644 /* ??? Could extract the appropriate HImode element and shift. */
20645 default:
20646 break;
20647 }
20648
20649 if (use_vec_extr)
20650 {
20651 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20652 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20653
20654 /* Let the rtl optimizers know about the zero extension performed. */
20655 if (inner_mode == HImode)
20656 {
20657 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20658 target = gen_lowpart (SImode, target);
20659 }
20660
20661 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20662 }
20663 else
20664 {
20665 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20666
20667 emit_move_insn (mem, vec);
20668
20669 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20670 emit_move_insn (target, tmp);
20671 }
20672 }
20673
20674 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20675 pattern to reduce; DEST is the destination; IN is the input vector. */
20676
20677 void
20678 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20679 {
20680 rtx tmp1, tmp2, tmp3;
20681
20682 tmp1 = gen_reg_rtx (V4SFmode);
20683 tmp2 = gen_reg_rtx (V4SFmode);
20684 tmp3 = gen_reg_rtx (V4SFmode);
20685
20686 emit_insn (gen_sse_movhlps (tmp1, in, in));
20687 emit_insn (fn (tmp2, tmp1, in));
20688
20689 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20690 GEN_INT (1), GEN_INT (1),
20691 GEN_INT (1+4), GEN_INT (1+4)));
20692 emit_insn (fn (dest, tmp2, tmp3));
20693 }
20694 \f
20695 /* Target hook for scalar_mode_supported_p. */
20696 static bool
20697 ix86_scalar_mode_supported_p (enum machine_mode mode)
20698 {
20699 if (DECIMAL_FLOAT_MODE_P (mode))
20700 return true;
20701 else
20702 return default_scalar_mode_supported_p (mode);
20703 }
20704
20705 /* Implements target hook vector_mode_supported_p. */
20706 static bool
20707 ix86_vector_mode_supported_p (enum machine_mode mode)
20708 {
20709 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20710 return true;
20711 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20712 return true;
20713 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20714 return true;
20715 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20716 return true;
20717 return false;
20718 }
20719
20720 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20721
20722 We do this in the new i386 backend to maintain source compatibility
20723 with the old cc0-based compiler. */
20724
20725 static tree
20726 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20727 tree inputs ATTRIBUTE_UNUSED,
20728 tree clobbers)
20729 {
20730 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20731 clobbers);
20732 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20733 clobbers);
20734 return clobbers;
20735 }
20736
20737 /* Return true if this goes in small data/bss. */
20738
20739 static bool
20740 ix86_in_large_data_p (tree exp)
20741 {
20742 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20743 return false;
20744
20745 /* Functions are never large data. */
20746 if (TREE_CODE (exp) == FUNCTION_DECL)
20747 return false;
20748
20749 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20750 {
20751 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20752 if (strcmp (section, ".ldata") == 0
20753 || strcmp (section, ".lbss") == 0)
20754 return true;
20755 return false;
20756 }
20757 else
20758 {
20759 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20760
20761 /* If this is an incomplete type with size 0, then we can't put it
20762 in data because it might be too big when completed. */
20763 if (!size || size > ix86_section_threshold)
20764 return true;
20765 }
20766
20767 return false;
20768 }
20769 static void
20770 ix86_encode_section_info (tree decl, rtx rtl, int first)
20771 {
20772 default_encode_section_info (decl, rtl, first);
20773
20774 if (TREE_CODE (decl) == VAR_DECL
20775 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20776 && ix86_in_large_data_p (decl))
20777 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20778 }
20779
20780 /* Worker function for REVERSE_CONDITION. */
20781
20782 enum rtx_code
20783 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20784 {
20785 return (mode != CCFPmode && mode != CCFPUmode
20786 ? reverse_condition (code)
20787 : reverse_condition_maybe_unordered (code));
20788 }
20789
20790 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20791 to OPERANDS[0]. */
20792
20793 const char *
20794 output_387_reg_move (rtx insn, rtx *operands)
20795 {
20796 if (REG_P (operands[1])
20797 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20798 {
20799 if (REGNO (operands[0]) == FIRST_STACK_REG)
20800 return output_387_ffreep (operands, 0);
20801 return "fstp\t%y0";
20802 }
20803 if (STACK_TOP_P (operands[0]))
20804 return "fld%z1\t%y1";
20805 return "fst\t%y0";
20806 }
20807
20808 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20809 FP status register is set. */
20810
20811 void
20812 ix86_emit_fp_unordered_jump (rtx label)
20813 {
20814 rtx reg = gen_reg_rtx (HImode);
20815 rtx temp;
20816
20817 emit_insn (gen_x86_fnstsw_1 (reg));
20818
20819 if (TARGET_USE_SAHF)
20820 {
20821 emit_insn (gen_x86_sahf_1 (reg));
20822
20823 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20824 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20825 }
20826 else
20827 {
20828 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20829
20830 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20831 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20832 }
20833
20834 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20835 gen_rtx_LABEL_REF (VOIDmode, label),
20836 pc_rtx);
20837 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20838 emit_jump_insn (temp);
20839 }
20840
20841 /* Output code to perform a log1p XFmode calculation. */
20842
20843 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20844 {
20845 rtx label1 = gen_label_rtx ();
20846 rtx label2 = gen_label_rtx ();
20847
20848 rtx tmp = gen_reg_rtx (XFmode);
20849 rtx tmp2 = gen_reg_rtx (XFmode);
20850
20851 emit_insn (gen_absxf2 (tmp, op1));
20852 emit_insn (gen_cmpxf (tmp,
20853 CONST_DOUBLE_FROM_REAL_VALUE (
20854 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20855 XFmode)));
20856 emit_jump_insn (gen_bge (label1));
20857
20858 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20859 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20860 emit_jump (label2);
20861
20862 emit_label (label1);
20863 emit_move_insn (tmp, CONST1_RTX (XFmode));
20864 emit_insn (gen_addxf3 (tmp, op1, tmp));
20865 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20866 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20867
20868 emit_label (label2);
20869 }
20870
20871 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20872
20873 static void
20874 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20875 tree decl)
20876 {
20877 /* With Binutils 2.15, the "@unwind" marker must be specified on
20878 every occurrence of the ".eh_frame" section, not just the first
20879 one. */
20880 if (TARGET_64BIT
20881 && strcmp (name, ".eh_frame") == 0)
20882 {
20883 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20884 flags & SECTION_WRITE ? "aw" : "a");
20885 return;
20886 }
20887 default_elf_asm_named_section (name, flags, decl);
20888 }
20889
20890 /* Return the mangling of TYPE if it is an extended fundamental type. */
20891
20892 static const char *
20893 ix86_mangle_fundamental_type (tree type)
20894 {
20895 switch (TYPE_MODE (type))
20896 {
20897 case TFmode:
20898 /* __float128 is "g". */
20899 return "g";
20900 case XFmode:
20901 /* "long double" or __float80 is "e". */
20902 return "e";
20903 default:
20904 return NULL;
20905 }
20906 }
20907
20908 /* For 32-bit code we can save PIC register setup by using
20909 __stack_chk_fail_local hidden function instead of calling
20910 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20911 register, so it is better to call __stack_chk_fail directly. */
20912
20913 static tree
20914 ix86_stack_protect_fail (void)
20915 {
20916 return TARGET_64BIT
20917 ? default_external_stack_protect_fail ()
20918 : default_hidden_stack_protect_fail ();
20919 }
20920
20921 /* Select a format to encode pointers in exception handling data. CODE
20922 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20923 true if the symbol may be affected by dynamic relocations.
20924
20925 ??? All x86 object file formats are capable of representing this.
20926 After all, the relocation needed is the same as for the call insn.
20927 Whether or not a particular assembler allows us to enter such, I
20928 guess we'll have to see. */
20929 int
20930 asm_preferred_eh_data_format (int code, int global)
20931 {
20932 if (flag_pic)
20933 {
20934 int type = DW_EH_PE_sdata8;
20935 if (!TARGET_64BIT
20936 || ix86_cmodel == CM_SMALL_PIC
20937 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20938 type = DW_EH_PE_sdata4;
20939 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20940 }
20941 if (ix86_cmodel == CM_SMALL
20942 || (ix86_cmodel == CM_MEDIUM && code))
20943 return DW_EH_PE_udata4;
20944 return DW_EH_PE_absptr;
20945 }
20946 \f
20947 /* Expand copysign from SIGN to the positive value ABS_VALUE
20948 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20949 the sign-bit. */
20950 static void
20951 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20952 {
20953 enum machine_mode mode = GET_MODE (sign);
20954 rtx sgn = gen_reg_rtx (mode);
20955 if (mask == NULL_RTX)
20956 {
20957 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20958 if (!VECTOR_MODE_P (mode))
20959 {
20960 /* We need to generate a scalar mode mask in this case. */
20961 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20962 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20963 mask = gen_reg_rtx (mode);
20964 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20965 }
20966 }
20967 else
20968 mask = gen_rtx_NOT (mode, mask);
20969 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20970 gen_rtx_AND (mode, mask, sign)));
20971 emit_insn (gen_rtx_SET (VOIDmode, result,
20972 gen_rtx_IOR (mode, abs_value, sgn)));
20973 }
20974
20975 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20976 mask for masking out the sign-bit is stored in *SMASK, if that is
20977 non-null. */
20978 static rtx
20979 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20980 {
20981 enum machine_mode mode = GET_MODE (op0);
20982 rtx xa, mask;
20983
20984 xa = gen_reg_rtx (mode);
20985 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20986 if (!VECTOR_MODE_P (mode))
20987 {
20988 /* We need to generate a scalar mode mask in this case. */
20989 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20990 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20991 mask = gen_reg_rtx (mode);
20992 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20993 }
20994 emit_insn (gen_rtx_SET (VOIDmode, xa,
20995 gen_rtx_AND (mode, op0, mask)));
20996
20997 if (smask)
20998 *smask = mask;
20999
21000 return xa;
21001 }
21002
21003 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21004 swapping the operands if SWAP_OPERANDS is true. The expanded
21005 code is a forward jump to a newly created label in case the
21006 comparison is true. The generated label rtx is returned. */
21007 static rtx
21008 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21009 bool swap_operands)
21010 {
21011 rtx label, tmp;
21012
21013 if (swap_operands)
21014 {
21015 tmp = op0;
21016 op0 = op1;
21017 op1 = tmp;
21018 }
21019
21020 label = gen_label_rtx ();
21021 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21022 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21023 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21024 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21025 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21026 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21027 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21028 JUMP_LABEL (tmp) = label;
21029
21030 return label;
21031 }
21032
21033 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21034 using comparison code CODE. Operands are swapped for the comparison if
21035 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21036 static rtx
21037 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21038 bool swap_operands)
21039 {
21040 enum machine_mode mode = GET_MODE (op0);
21041 rtx mask = gen_reg_rtx (mode);
21042
21043 if (swap_operands)
21044 {
21045 rtx tmp = op0;
21046 op0 = op1;
21047 op1 = tmp;
21048 }
21049
21050 if (mode == DFmode)
21051 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21052 gen_rtx_fmt_ee (code, mode, op0, op1)));
21053 else
21054 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21055 gen_rtx_fmt_ee (code, mode, op0, op1)));
21056
21057 return mask;
21058 }
21059
21060 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21061 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21062 static rtx
21063 ix86_gen_TWO52 (enum machine_mode mode)
21064 {
21065 REAL_VALUE_TYPE TWO52r;
21066 rtx TWO52;
21067
21068 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21069 TWO52 = const_double_from_real_value (TWO52r, mode);
21070 TWO52 = force_reg (mode, TWO52);
21071
21072 return TWO52;
21073 }
21074
21075 /* Expand SSE sequence for computing lround from OP1 storing
21076 into OP0. */
21077 void
21078 ix86_expand_lround (rtx op0, rtx op1)
21079 {
21080 /* C code for the stuff we're doing below:
21081 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21082 return (long)tmp;
21083 */
21084 enum machine_mode mode = GET_MODE (op1);
21085 const struct real_format *fmt;
21086 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21087 rtx adj;
21088
21089 /* load nextafter (0.5, 0.0) */
21090 fmt = REAL_MODE_FORMAT (mode);
21091 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21092 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21093
21094 /* adj = copysign (0.5, op1) */
21095 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21096 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21097
21098 /* adj = op1 + adj */
21099 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21100
21101 /* op0 = (imode)adj */
21102 expand_fix (op0, adj, 0);
21103 }
21104
21105 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21106 into OPERAND0. */
21107 void
21108 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21109 {
21110 /* C code for the stuff we're doing below (for do_floor):
21111 xi = (long)op1;
21112 xi -= (double)xi > op1 ? 1 : 0;
21113 return xi;
21114 */
21115 enum machine_mode fmode = GET_MODE (op1);
21116 enum machine_mode imode = GET_MODE (op0);
21117 rtx ireg, freg, label, tmp;
21118
21119 /* reg = (long)op1 */
21120 ireg = gen_reg_rtx (imode);
21121 expand_fix (ireg, op1, 0);
21122
21123 /* freg = (double)reg */
21124 freg = gen_reg_rtx (fmode);
21125 expand_float (freg, ireg, 0);
21126
21127 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21128 label = ix86_expand_sse_compare_and_jump (UNLE,
21129 freg, op1, !do_floor);
21130 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21131 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21132 emit_move_insn (ireg, tmp);
21133
21134 emit_label (label);
21135 LABEL_NUSES (label) = 1;
21136
21137 emit_move_insn (op0, ireg);
21138 }
21139
21140 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21141 result in OPERAND0. */
21142 void
21143 ix86_expand_rint (rtx operand0, rtx operand1)
21144 {
21145 /* C code for the stuff we're doing below:
21146 xa = fabs (operand1);
21147 if (!isless (xa, 2**52))
21148 return operand1;
21149 xa = xa + 2**52 - 2**52;
21150 return copysign (xa, operand1);
21151 */
21152 enum machine_mode mode = GET_MODE (operand0);
21153 rtx res, xa, label, TWO52, mask;
21154
21155 res = gen_reg_rtx (mode);
21156 emit_move_insn (res, operand1);
21157
21158 /* xa = abs (operand1) */
21159 xa = ix86_expand_sse_fabs (res, &mask);
21160
21161 /* if (!isless (xa, TWO52)) goto label; */
21162 TWO52 = ix86_gen_TWO52 (mode);
21163 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21164
21165 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21166 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21167
21168 ix86_sse_copysign_to_positive (res, xa, res, mask);
21169
21170 emit_label (label);
21171 LABEL_NUSES (label) = 1;
21172
21173 emit_move_insn (operand0, res);
21174 }
21175
21176 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21177 into OPERAND0. */
21178 void
21179 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21180 {
21181 /* C code for the stuff we expand below.
21182 double xa = fabs (x), x2;
21183 if (!isless (xa, TWO52))
21184 return x;
21185 xa = xa + TWO52 - TWO52;
21186 x2 = copysign (xa, x);
21187 Compensate. Floor:
21188 if (x2 > x)
21189 x2 -= 1;
21190 Compensate. Ceil:
21191 if (x2 < x)
21192 x2 -= -1;
21193 return x2;
21194 */
21195 enum machine_mode mode = GET_MODE (operand0);
21196 rtx xa, TWO52, tmp, label, one, res, mask;
21197
21198 TWO52 = ix86_gen_TWO52 (mode);
21199
21200 /* Temporary for holding the result, initialized to the input
21201 operand to ease control flow. */
21202 res = gen_reg_rtx (mode);
21203 emit_move_insn (res, operand1);
21204
21205 /* xa = abs (operand1) */
21206 xa = ix86_expand_sse_fabs (res, &mask);
21207
21208 /* if (!isless (xa, TWO52)) goto label; */
21209 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21210
21211 /* xa = xa + TWO52 - TWO52; */
21212 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21213 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21214
21215 /* xa = copysign (xa, operand1) */
21216 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21217
21218 /* generate 1.0 or -1.0 */
21219 one = force_reg (mode,
21220 const_double_from_real_value (do_floor
21221 ? dconst1 : dconstm1, mode));
21222
21223 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21224 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21225 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21226 gen_rtx_AND (mode, one, tmp)));
21227 /* We always need to subtract here to preserve signed zero. */
21228 tmp = expand_simple_binop (mode, MINUS,
21229 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21230 emit_move_insn (res, tmp);
21231
21232 emit_label (label);
21233 LABEL_NUSES (label) = 1;
21234
21235 emit_move_insn (operand0, res);
21236 }
21237
21238 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21239 into OPERAND0. */
21240 void
21241 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21242 {
21243 /* C code for the stuff we expand below.
21244 double xa = fabs (x), x2;
21245 if (!isless (xa, TWO52))
21246 return x;
21247 x2 = (double)(long)x;
21248 Compensate. Floor:
21249 if (x2 > x)
21250 x2 -= 1;
21251 Compensate. Ceil:
21252 if (x2 < x)
21253 x2 += 1;
21254 if (HONOR_SIGNED_ZEROS (mode))
21255 return copysign (x2, x);
21256 return x2;
21257 */
21258 enum machine_mode mode = GET_MODE (operand0);
21259 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21260
21261 TWO52 = ix86_gen_TWO52 (mode);
21262
21263 /* Temporary for holding the result, initialized to the input
21264 operand to ease control flow. */
21265 res = gen_reg_rtx (mode);
21266 emit_move_insn (res, operand1);
21267
21268 /* xa = abs (operand1) */
21269 xa = ix86_expand_sse_fabs (res, &mask);
21270
21271 /* if (!isless (xa, TWO52)) goto label; */
21272 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21273
21274 /* xa = (double)(long)x */
21275 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21276 expand_fix (xi, res, 0);
21277 expand_float (xa, xi, 0);
21278
21279 /* generate 1.0 */
21280 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21281
21282 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21283 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21284 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21285 gen_rtx_AND (mode, one, tmp)));
21286 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21287 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21288 emit_move_insn (res, tmp);
21289
21290 if (HONOR_SIGNED_ZEROS (mode))
21291 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21292
21293 emit_label (label);
21294 LABEL_NUSES (label) = 1;
21295
21296 emit_move_insn (operand0, res);
21297 }
21298
21299 /* Expand SSE sequence for computing round from OPERAND1 storing
21300 into OPERAND0. Sequence that works without relying on DImode truncation
21301 via cvttsd2siq that is only available on 64bit targets. */
21302 void
21303 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21304 {
21305 /* C code for the stuff we expand below.
21306 double xa = fabs (x), xa2, x2;
21307 if (!isless (xa, TWO52))
21308 return x;
21309 Using the absolute value and copying back sign makes
21310 -0.0 -> -0.0 correct.
21311 xa2 = xa + TWO52 - TWO52;
21312 Compensate.
21313 dxa = xa2 - xa;
21314 if (dxa <= -0.5)
21315 xa2 += 1;
21316 else if (dxa > 0.5)
21317 xa2 -= 1;
21318 x2 = copysign (xa2, x);
21319 return x2;
21320 */
21321 enum machine_mode mode = GET_MODE (operand0);
21322 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21323
21324 TWO52 = ix86_gen_TWO52 (mode);
21325
21326 /* Temporary for holding the result, initialized to the input
21327 operand to ease control flow. */
21328 res = gen_reg_rtx (mode);
21329 emit_move_insn (res, operand1);
21330
21331 /* xa = abs (operand1) */
21332 xa = ix86_expand_sse_fabs (res, &mask);
21333
21334 /* if (!isless (xa, TWO52)) goto label; */
21335 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21336
21337 /* xa2 = xa + TWO52 - TWO52; */
21338 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21339 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21340
21341 /* dxa = xa2 - xa; */
21342 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21343
21344 /* generate 0.5, 1.0 and -0.5 */
21345 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21346 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21347 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21348 0, OPTAB_DIRECT);
21349
21350 /* Compensate. */
21351 tmp = gen_reg_rtx (mode);
21352 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21353 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21354 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21355 gen_rtx_AND (mode, one, tmp)));
21356 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21357 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21358 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21359 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21360 gen_rtx_AND (mode, one, tmp)));
21361 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21362
21363 /* res = copysign (xa2, operand1) */
21364 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21365
21366 emit_label (label);
21367 LABEL_NUSES (label) = 1;
21368
21369 emit_move_insn (operand0, res);
21370 }
21371
21372 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21373 into OPERAND0. */
21374 void
21375 ix86_expand_trunc (rtx operand0, rtx operand1)
21376 {
21377 /* C code for SSE variant we expand below.
21378 double xa = fabs (x), x2;
21379 if (!isless (xa, TWO52))
21380 return x;
21381 x2 = (double)(long)x;
21382 if (HONOR_SIGNED_ZEROS (mode))
21383 return copysign (x2, x);
21384 return x2;
21385 */
21386 enum machine_mode mode = GET_MODE (operand0);
21387 rtx xa, xi, TWO52, label, res, mask;
21388
21389 TWO52 = ix86_gen_TWO52 (mode);
21390
21391 /* Temporary for holding the result, initialized to the input
21392 operand to ease control flow. */
21393 res = gen_reg_rtx (mode);
21394 emit_move_insn (res, operand1);
21395
21396 /* xa = abs (operand1) */
21397 xa = ix86_expand_sse_fabs (res, &mask);
21398
21399 /* if (!isless (xa, TWO52)) goto label; */
21400 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21401
21402 /* x = (double)(long)x */
21403 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21404 expand_fix (xi, res, 0);
21405 expand_float (res, xi, 0);
21406
21407 if (HONOR_SIGNED_ZEROS (mode))
21408 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21409
21410 emit_label (label);
21411 LABEL_NUSES (label) = 1;
21412
21413 emit_move_insn (operand0, res);
21414 }
21415
21416 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21417 into OPERAND0. */
21418 void
21419 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21420 {
21421 enum machine_mode mode = GET_MODE (operand0);
21422 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21423
21424 /* C code for SSE variant we expand below.
21425 double xa = fabs (x), x2;
21426 if (!isless (xa, TWO52))
21427 return x;
21428 xa2 = xa + TWO52 - TWO52;
21429 Compensate:
21430 if (xa2 > xa)
21431 xa2 -= 1.0;
21432 x2 = copysign (xa2, x);
21433 return x2;
21434 */
21435
21436 TWO52 = ix86_gen_TWO52 (mode);
21437
21438 /* Temporary for holding the result, initialized to the input
21439 operand to ease control flow. */
21440 res = gen_reg_rtx (mode);
21441 emit_move_insn (res, operand1);
21442
21443 /* xa = abs (operand1) */
21444 xa = ix86_expand_sse_fabs (res, &smask);
21445
21446 /* if (!isless (xa, TWO52)) goto label; */
21447 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21448
21449 /* res = xa + TWO52 - TWO52; */
21450 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21451 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21452 emit_move_insn (res, tmp);
21453
21454 /* generate 1.0 */
21455 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21456
21457 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21458 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21459 emit_insn (gen_rtx_SET (VOIDmode, mask,
21460 gen_rtx_AND (mode, mask, one)));
21461 tmp = expand_simple_binop (mode, MINUS,
21462 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21463 emit_move_insn (res, tmp);
21464
21465 /* res = copysign (res, operand1) */
21466 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21467
21468 emit_label (label);
21469 LABEL_NUSES (label) = 1;
21470
21471 emit_move_insn (operand0, res);
21472 }
21473
21474 /* Expand SSE sequence for computing round from OPERAND1 storing
21475 into OPERAND0. */
21476 void
21477 ix86_expand_round (rtx operand0, rtx operand1)
21478 {
21479 /* C code for the stuff we're doing below:
21480 double xa = fabs (x);
21481 if (!isless (xa, TWO52))
21482 return x;
21483 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21484 return copysign (xa, x);
21485 */
21486 enum machine_mode mode = GET_MODE (operand0);
21487 rtx res, TWO52, xa, label, xi, half, mask;
21488 const struct real_format *fmt;
21489 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21490
21491 /* Temporary for holding the result, initialized to the input
21492 operand to ease control flow. */
21493 res = gen_reg_rtx (mode);
21494 emit_move_insn (res, operand1);
21495
21496 TWO52 = ix86_gen_TWO52 (mode);
21497 xa = ix86_expand_sse_fabs (res, &mask);
21498 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21499
21500 /* load nextafter (0.5, 0.0) */
21501 fmt = REAL_MODE_FORMAT (mode);
21502 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21503 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21504
21505 /* xa = xa + 0.5 */
21506 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21507 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21508
21509 /* xa = (double)(int64_t)xa */
21510 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21511 expand_fix (xi, xa, 0);
21512 expand_float (xa, xi, 0);
21513
21514 /* res = copysign (xa, operand1) */
21515 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21516
21517 emit_label (label);
21518 LABEL_NUSES (label) = 1;
21519
21520 emit_move_insn (operand0, res);
21521 }
21522
21523 #include "gt-i386.h"