i386.c (override_options): Add PTA_NO_SAHF to k8...
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_SLOW_IMUL_IMM32_MEM (imul of 32-bit constant and memory is vector
1204 path on AMD machines) */
1205 m_K8 | m_GENERIC64 | m_AMDFAM10,
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM8 (imul of 8-bit constant is vector path on AMD
1208 machines) */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_MOVE_M1_VIA_OR (on pentiums, it is faster to load -1 via OR than
1212 a MOV) */
1213 m_PENT,
1214
1215 /* X86_TUNE_NOT_UNPAIRABLE (NOT is not pairable on Pentium, while XOR is, but
1216 one byte longer). */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_VECTORMODE (On AMD K6, NOT is vector decoded with memory
1220 operand that cannot be represented using a modRM byte. The XOR
1221 replacement is long decoded, so this split helps here as well). */
1222 m_K6,
1223 };
1224
1225 /* Feature tests against the various architecture variations. */
1226 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1227 /* X86_ARCH_CMOVE */
1228 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1229
1230 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1231 ~m_386,
1232
1233 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1234 ~(m_386 | m_486),
1235
1236 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1237 ~m_386,
1238
1239 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1240 ~m_386,
1241 };
1242
1243 static const unsigned int x86_accumulate_outgoing_args
1244 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1245
1246 static const unsigned int x86_arch_always_fancy_math_387
1247 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1248 | m_NOCONA | m_CORE2 | m_GENERIC;
1249
1250 static enum stringop_alg stringop_alg = no_stringop;
1251
1252 /* In case the average insn count for single function invocation is
1253 lower than this constant, emit fast (but longer) prologue and
1254 epilogue code. */
1255 #define FAST_PROLOGUE_INSN_COUNT 20
1256
1257 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1258 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1259 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1260 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1261
1262 /* Array of the smallest class containing reg number REGNO, indexed by
1263 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1264
1265 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1266 {
1267 /* ax, dx, cx, bx */
1268 AREG, DREG, CREG, BREG,
1269 /* si, di, bp, sp */
1270 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1271 /* FP registers */
1272 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1273 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1274 /* arg pointer */
1275 NON_Q_REGS,
1276 /* flags, fpsr, fpcr, frame */
1277 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1278 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1279 SSE_REGS, SSE_REGS,
1280 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1281 MMX_REGS, MMX_REGS,
1282 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1283 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1284 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1285 SSE_REGS, SSE_REGS,
1286 };
1287
1288 /* The "default" register map used in 32bit mode. */
1289
1290 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1291 {
1292 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1293 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1294 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1295 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1296 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1297 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1298 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1299 };
1300
1301 static int const x86_64_int_parameter_registers[6] =
1302 {
1303 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1304 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1305 };
1306
1307 static int const x86_64_int_return_registers[4] =
1308 {
1309 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1310 };
1311
1312 /* The "default" register map used in 64bit mode. */
1313 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1314 {
1315 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1316 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1317 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1318 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1319 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1320 8,9,10,11,12,13,14,15, /* extended integer registers */
1321 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1322 };
1323
1324 /* Define the register numbers to be used in Dwarf debugging information.
1325 The SVR4 reference port C compiler uses the following register numbers
1326 in its Dwarf output code:
1327 0 for %eax (gcc regno = 0)
1328 1 for %ecx (gcc regno = 2)
1329 2 for %edx (gcc regno = 1)
1330 3 for %ebx (gcc regno = 3)
1331 4 for %esp (gcc regno = 7)
1332 5 for %ebp (gcc regno = 6)
1333 6 for %esi (gcc regno = 4)
1334 7 for %edi (gcc regno = 5)
1335 The following three DWARF register numbers are never generated by
1336 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1337 believes these numbers have these meanings.
1338 8 for %eip (no gcc equivalent)
1339 9 for %eflags (gcc regno = 17)
1340 10 for %trapno (no gcc equivalent)
1341 It is not at all clear how we should number the FP stack registers
1342 for the x86 architecture. If the version of SDB on x86/svr4 were
1343 a bit less brain dead with respect to floating-point then we would
1344 have a precedent to follow with respect to DWARF register numbers
1345 for x86 FP registers, but the SDB on x86/svr4 is so completely
1346 broken with respect to FP registers that it is hardly worth thinking
1347 of it as something to strive for compatibility with.
1348 The version of x86/svr4 SDB I have at the moment does (partially)
1349 seem to believe that DWARF register number 11 is associated with
1350 the x86 register %st(0), but that's about all. Higher DWARF
1351 register numbers don't seem to be associated with anything in
1352 particular, and even for DWARF regno 11, SDB only seems to under-
1353 stand that it should say that a variable lives in %st(0) (when
1354 asked via an `=' command) if we said it was in DWARF regno 11,
1355 but SDB still prints garbage when asked for the value of the
1356 variable in question (via a `/' command).
1357 (Also note that the labels SDB prints for various FP stack regs
1358 when doing an `x' command are all wrong.)
1359 Note that these problems generally don't affect the native SVR4
1360 C compiler because it doesn't allow the use of -O with -g and
1361 because when it is *not* optimizing, it allocates a memory
1362 location for each floating-point variable, and the memory
1363 location is what gets described in the DWARF AT_location
1364 attribute for the variable in question.
1365 Regardless of the severe mental illness of the x86/svr4 SDB, we
1366 do something sensible here and we use the following DWARF
1367 register numbers. Note that these are all stack-top-relative
1368 numbers.
1369 11 for %st(0) (gcc regno = 8)
1370 12 for %st(1) (gcc regno = 9)
1371 13 for %st(2) (gcc regno = 10)
1372 14 for %st(3) (gcc regno = 11)
1373 15 for %st(4) (gcc regno = 12)
1374 16 for %st(5) (gcc regno = 13)
1375 17 for %st(6) (gcc regno = 14)
1376 18 for %st(7) (gcc regno = 15)
1377 */
1378 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1379 {
1380 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1381 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1382 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1383 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1384 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1385 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1386 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1387 };
1388
1389 /* Test and compare insns in i386.md store the information needed to
1390 generate branch and scc insns here. */
1391
1392 rtx ix86_compare_op0 = NULL_RTX;
1393 rtx ix86_compare_op1 = NULL_RTX;
1394 rtx ix86_compare_emitted = NULL_RTX;
1395
1396 /* Size of the register save area. */
1397 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1398
1399 /* Define the structure for the machine field in struct function. */
1400
1401 struct stack_local_entry GTY(())
1402 {
1403 unsigned short mode;
1404 unsigned short n;
1405 rtx rtl;
1406 struct stack_local_entry *next;
1407 };
1408
1409 /* Structure describing stack frame layout.
1410 Stack grows downward:
1411
1412 [arguments]
1413 <- ARG_POINTER
1414 saved pc
1415
1416 saved frame pointer if frame_pointer_needed
1417 <- HARD_FRAME_POINTER
1418 [saved regs]
1419
1420 [padding1] \
1421 )
1422 [va_arg registers] (
1423 > to_allocate <- FRAME_POINTER
1424 [frame] (
1425 )
1426 [padding2] /
1427 */
1428 struct ix86_frame
1429 {
1430 int nregs;
1431 int padding1;
1432 int va_arg_size;
1433 HOST_WIDE_INT frame;
1434 int padding2;
1435 int outgoing_arguments_size;
1436 int red_zone_size;
1437
1438 HOST_WIDE_INT to_allocate;
1439 /* The offsets relative to ARG_POINTER. */
1440 HOST_WIDE_INT frame_pointer_offset;
1441 HOST_WIDE_INT hard_frame_pointer_offset;
1442 HOST_WIDE_INT stack_pointer_offset;
1443
1444 /* When save_regs_using_mov is set, emit prologue using
1445 move instead of push instructions. */
1446 bool save_regs_using_mov;
1447 };
1448
1449 /* Code model option. */
1450 enum cmodel ix86_cmodel;
1451 /* Asm dialect. */
1452 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1453 /* TLS dialects. */
1454 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1455
1456 /* Which unit we are generating floating point math for. */
1457 enum fpmath_unit ix86_fpmath;
1458
1459 /* Which cpu are we scheduling for. */
1460 enum processor_type ix86_tune;
1461
1462 /* Which instruction set architecture to use. */
1463 enum processor_type ix86_arch;
1464
1465 /* true if sse prefetch instruction is not NOOP. */
1466 int x86_prefetch_sse;
1467
1468 /* ix86_regparm_string as a number */
1469 static int ix86_regparm;
1470
1471 /* -mstackrealign option */
1472 extern int ix86_force_align_arg_pointer;
1473 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1474
1475 /* Preferred alignment for stack boundary in bits. */
1476 unsigned int ix86_preferred_stack_boundary;
1477
1478 /* Values 1-5: see jump.c */
1479 int ix86_branch_cost;
1480
1481 /* Variables which are this size or smaller are put in the data/bss
1482 or ldata/lbss sections. */
1483
1484 int ix86_section_threshold = 65536;
1485
1486 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1487 char internal_label_prefix[16];
1488 int internal_label_prefix_len;
1489 \f
1490 static bool ix86_handle_option (size_t, const char *, int);
1491 static void output_pic_addr_const (FILE *, rtx, int);
1492 static void put_condition_code (enum rtx_code, enum machine_mode,
1493 int, int, FILE *);
1494 static const char *get_some_local_dynamic_name (void);
1495 static int get_some_local_dynamic_name_1 (rtx *, void *);
1496 static rtx ix86_expand_int_compare (enum rtx_code, rtx, rtx);
1497 static enum rtx_code ix86_prepare_fp_compare_args (enum rtx_code, rtx *,
1498 rtx *);
1499 static bool ix86_fixed_condition_code_regs (unsigned int *, unsigned int *);
1500 static enum machine_mode ix86_cc_modes_compatible (enum machine_mode,
1501 enum machine_mode);
1502 static rtx get_thread_pointer (int);
1503 static rtx legitimize_tls_address (rtx, enum tls_model, int);
1504 static void get_pc_thunk_name (char [32], unsigned int);
1505 static rtx gen_push (rtx);
1506 static int ix86_flags_dependent (rtx, rtx, enum attr_type);
1507 static int ix86_agi_dependent (rtx, rtx, enum attr_type);
1508 static struct machine_function * ix86_init_machine_status (void);
1509 static int ix86_split_to_parts (rtx, rtx *, enum machine_mode);
1510 static int ix86_nsaved_regs (void);
1511 static void ix86_emit_save_regs (void);
1512 static void ix86_emit_save_regs_using_mov (rtx, HOST_WIDE_INT);
1513 static void ix86_emit_restore_regs_using_mov (rtx, HOST_WIDE_INT, int);
1514 static void ix86_output_function_epilogue (FILE *, HOST_WIDE_INT);
1515 static HOST_WIDE_INT ix86_GOT_alias_set (void);
1516 static void ix86_adjust_counter (rtx, HOST_WIDE_INT);
1517 static void ix86_expand_strlensi_unroll_1 (rtx, rtx, rtx);
1518 static int ix86_issue_rate (void);
1519 static int ix86_adjust_cost (rtx, rtx, rtx, int);
1520 static int ia32_multipass_dfa_lookahead (void);
1521 static void ix86_init_mmx_sse_builtins (void);
1522 static rtx x86_this_parameter (tree);
1523 static void x86_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
1524 HOST_WIDE_INT, tree);
1525 static bool x86_can_output_mi_thunk (tree, HOST_WIDE_INT, HOST_WIDE_INT, tree);
1526 static void x86_file_start (void);
1527 static void ix86_reorg (void);
1528 static bool ix86_expand_carry_flag_compare (enum rtx_code, rtx, rtx, rtx*);
1529 static tree ix86_build_builtin_va_list (void);
1530 static void ix86_setup_incoming_varargs (CUMULATIVE_ARGS *, enum machine_mode,
1531 tree, int *, int);
1532 static tree ix86_gimplify_va_arg (tree, tree, tree *, tree *);
1533 static bool ix86_scalar_mode_supported_p (enum machine_mode);
1534 static bool ix86_vector_mode_supported_p (enum machine_mode);
1535
1536 static int ix86_address_cost (rtx);
1537 static bool ix86_cannot_force_const_mem (rtx);
1538 static rtx ix86_delegitimize_address (rtx);
1539
1540 static void i386_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
1541
1542 struct builtin_description;
1543 static rtx ix86_expand_sse_comi (const struct builtin_description *,
1544 tree, rtx);
1545 static rtx ix86_expand_sse_compare (const struct builtin_description *,
1546 tree, rtx);
1547 static rtx ix86_expand_unop1_builtin (enum insn_code, tree, rtx);
1548 static rtx ix86_expand_unop_builtin (enum insn_code, tree, rtx, int);
1549 static rtx ix86_expand_binop_builtin (enum insn_code, tree, rtx);
1550 static rtx ix86_expand_store_builtin (enum insn_code, tree);
1551 static rtx safe_vector_operand (rtx, enum machine_mode);
1552 static rtx ix86_expand_fp_compare (enum rtx_code, rtx, rtx, rtx, rtx *, rtx *);
1553 static int ix86_fp_comparison_arithmetics_cost (enum rtx_code code);
1554 static int ix86_fp_comparison_fcomi_cost (enum rtx_code code);
1555 static int ix86_fp_comparison_sahf_cost (enum rtx_code code);
1556 static int ix86_fp_comparison_cost (enum rtx_code code);
1557 static unsigned int ix86_select_alt_pic_regnum (void);
1558 static int ix86_save_reg (unsigned int, int);
1559 static void ix86_compute_frame_layout (struct ix86_frame *);
1560 static int ix86_comp_type_attributes (tree, tree);
1561 static int ix86_function_regparm (tree, tree);
1562 const struct attribute_spec ix86_attribute_table[];
1563 static bool ix86_function_ok_for_sibcall (tree, tree);
1564 static tree ix86_handle_cconv_attribute (tree *, tree, tree, int, bool *);
1565 static int ix86_value_regno (enum machine_mode, tree, tree);
1566 static bool contains_128bit_aligned_vector_p (tree);
1567 static rtx ix86_struct_value_rtx (tree, int);
1568 static bool ix86_ms_bitfield_layout_p (tree);
1569 static tree ix86_handle_struct_attribute (tree *, tree, tree, int, bool *);
1570 static int extended_reg_mentioned_1 (rtx *, void *);
1571 static bool ix86_rtx_costs (rtx, int, int, int *);
1572 static int min_insn_size (rtx);
1573 static tree ix86_md_asm_clobbers (tree outputs, tree inputs, tree clobbers);
1574 static bool ix86_must_pass_in_stack (enum machine_mode mode, tree type);
1575 static bool ix86_pass_by_reference (CUMULATIVE_ARGS *, enum machine_mode,
1576 tree, bool);
1577 static void ix86_init_builtins (void);
1578 static rtx ix86_expand_builtin (tree, rtx, rtx, enum machine_mode, int);
1579 static tree ix86_builtin_vectorized_function (enum built_in_function, tree, tree);
1580 static tree ix86_builtin_conversion (enum tree_code, tree);
1581 static const char *ix86_mangle_fundamental_type (tree);
1582 static tree ix86_stack_protect_fail (void);
1583 static rtx ix86_internal_arg_pointer (void);
1584 static void ix86_dwarf_handle_frame_unspec (const char *, rtx, int);
1585 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1586 rtx, rtx, int);
1587
1588 /* This function is only used on Solaris. */
1589 static void i386_solaris_elf_named_section (const char *, unsigned int, tree)
1590 ATTRIBUTE_UNUSED;
1591
1592 /* Register class used for passing given 64bit part of the argument.
1593 These represent classes as documented by the PS ABI, with the exception
1594 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1595 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1596
1597 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1598 whenever possible (upper half does contain padding).
1599 */
1600 enum x86_64_reg_class
1601 {
1602 X86_64_NO_CLASS,
1603 X86_64_INTEGER_CLASS,
1604 X86_64_INTEGERSI_CLASS,
1605 X86_64_SSE_CLASS,
1606 X86_64_SSESF_CLASS,
1607 X86_64_SSEDF_CLASS,
1608 X86_64_SSEUP_CLASS,
1609 X86_64_X87_CLASS,
1610 X86_64_X87UP_CLASS,
1611 X86_64_COMPLEX_X87_CLASS,
1612 X86_64_MEMORY_CLASS
1613 };
1614 static const char * const x86_64_reg_class_name[] = {
1615 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1616 "sseup", "x87", "x87up", "cplx87", "no"
1617 };
1618
1619 #define MAX_CLASSES 4
1620
1621 /* Table of constants used by fldpi, fldln2, etc.... */
1622 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1623 static bool ext_80387_constants_init = 0;
1624 static void init_ext_80387_constants (void);
1625 static bool ix86_in_large_data_p (tree) ATTRIBUTE_UNUSED;
1626 static void ix86_encode_section_info (tree, rtx, int) ATTRIBUTE_UNUSED;
1627 static void x86_64_elf_unique_section (tree decl, int reloc) ATTRIBUTE_UNUSED;
1628 static section *x86_64_elf_select_section (tree decl, int reloc,
1629 unsigned HOST_WIDE_INT align)
1630 ATTRIBUTE_UNUSED;
1631 \f
1632 /* Initialize the GCC target structure. */
1633 #undef TARGET_ATTRIBUTE_TABLE
1634 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
1635 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
1636 # undef TARGET_MERGE_DECL_ATTRIBUTES
1637 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
1638 #endif
1639
1640 #undef TARGET_COMP_TYPE_ATTRIBUTES
1641 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
1642
1643 #undef TARGET_INIT_BUILTINS
1644 #define TARGET_INIT_BUILTINS ix86_init_builtins
1645 #undef TARGET_EXPAND_BUILTIN
1646 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
1647
1648 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
1649 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
1650 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
1651 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
1652
1653 #undef TARGET_ASM_FUNCTION_EPILOGUE
1654 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
1655
1656 #undef TARGET_ENCODE_SECTION_INFO
1657 #ifndef SUBTARGET_ENCODE_SECTION_INFO
1658 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
1659 #else
1660 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
1661 #endif
1662
1663 #undef TARGET_ASM_OPEN_PAREN
1664 #define TARGET_ASM_OPEN_PAREN ""
1665 #undef TARGET_ASM_CLOSE_PAREN
1666 #define TARGET_ASM_CLOSE_PAREN ""
1667
1668 #undef TARGET_ASM_ALIGNED_HI_OP
1669 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
1670 #undef TARGET_ASM_ALIGNED_SI_OP
1671 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
1672 #ifdef ASM_QUAD
1673 #undef TARGET_ASM_ALIGNED_DI_OP
1674 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
1675 #endif
1676
1677 #undef TARGET_ASM_UNALIGNED_HI_OP
1678 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
1679 #undef TARGET_ASM_UNALIGNED_SI_OP
1680 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
1681 #undef TARGET_ASM_UNALIGNED_DI_OP
1682 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
1683
1684 #undef TARGET_SCHED_ADJUST_COST
1685 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
1686 #undef TARGET_SCHED_ISSUE_RATE
1687 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
1688 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
1689 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
1690 ia32_multipass_dfa_lookahead
1691
1692 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
1693 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
1694
1695 #ifdef HAVE_AS_TLS
1696 #undef TARGET_HAVE_TLS
1697 #define TARGET_HAVE_TLS true
1698 #endif
1699 #undef TARGET_CANNOT_FORCE_CONST_MEM
1700 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
1701 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
1702 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
1703
1704 #undef TARGET_DELEGITIMIZE_ADDRESS
1705 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
1706
1707 #undef TARGET_MS_BITFIELD_LAYOUT_P
1708 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
1709
1710 #if TARGET_MACHO
1711 #undef TARGET_BINDS_LOCAL_P
1712 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
1713 #endif
1714
1715 #undef TARGET_ASM_OUTPUT_MI_THUNK
1716 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
1717 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
1718 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
1719
1720 #undef TARGET_ASM_FILE_START
1721 #define TARGET_ASM_FILE_START x86_file_start
1722
1723 #undef TARGET_DEFAULT_TARGET_FLAGS
1724 #define TARGET_DEFAULT_TARGET_FLAGS \
1725 (TARGET_DEFAULT \
1726 | TARGET_64BIT_DEFAULT \
1727 | TARGET_SUBTARGET_DEFAULT \
1728 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
1729
1730 #undef TARGET_HANDLE_OPTION
1731 #define TARGET_HANDLE_OPTION ix86_handle_option
1732
1733 #undef TARGET_RTX_COSTS
1734 #define TARGET_RTX_COSTS ix86_rtx_costs
1735 #undef TARGET_ADDRESS_COST
1736 #define TARGET_ADDRESS_COST ix86_address_cost
1737
1738 #undef TARGET_FIXED_CONDITION_CODE_REGS
1739 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
1740 #undef TARGET_CC_MODES_COMPATIBLE
1741 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
1742
1743 #undef TARGET_MACHINE_DEPENDENT_REORG
1744 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
1745
1746 #undef TARGET_BUILD_BUILTIN_VA_LIST
1747 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
1748
1749 #undef TARGET_MD_ASM_CLOBBERS
1750 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
1751
1752 #undef TARGET_PROMOTE_PROTOTYPES
1753 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
1754 #undef TARGET_STRUCT_VALUE_RTX
1755 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
1756 #undef TARGET_SETUP_INCOMING_VARARGS
1757 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
1758 #undef TARGET_MUST_PASS_IN_STACK
1759 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
1760 #undef TARGET_PASS_BY_REFERENCE
1761 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
1762 #undef TARGET_INTERNAL_ARG_POINTER
1763 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
1764 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
1765 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
1766
1767 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
1768 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
1769
1770 #undef TARGET_SCALAR_MODE_SUPPORTED_P
1771 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
1772
1773 #undef TARGET_VECTOR_MODE_SUPPORTED_P
1774 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
1775
1776 #ifdef HAVE_AS_TLS
1777 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
1778 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
1779 #endif
1780
1781 #ifdef SUBTARGET_INSERT_ATTRIBUTES
1782 #undef TARGET_INSERT_ATTRIBUTES
1783 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
1784 #endif
1785
1786 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
1787 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
1788
1789 #undef TARGET_STACK_PROTECT_FAIL
1790 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
1791
1792 #undef TARGET_FUNCTION_VALUE
1793 #define TARGET_FUNCTION_VALUE ix86_function_value
1794
1795 struct gcc_target targetm = TARGET_INITIALIZER;
1796
1797 \f
1798 /* The svr4 ABI for the i386 says that records and unions are returned
1799 in memory. */
1800 #ifndef DEFAULT_PCC_STRUCT_RETURN
1801 #define DEFAULT_PCC_STRUCT_RETURN 1
1802 #endif
1803
1804 /* Implement TARGET_HANDLE_OPTION. */
1805
1806 static bool
1807 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1808 {
1809 switch (code)
1810 {
1811 case OPT_m3dnow:
1812 if (!value)
1813 {
1814 target_flags &= ~MASK_3DNOW_A;
1815 target_flags_explicit |= MASK_3DNOW_A;
1816 }
1817 return true;
1818
1819 case OPT_mmmx:
1820 if (!value)
1821 {
1822 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1823 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1824 }
1825 return true;
1826
1827 case OPT_msse:
1828 if (!value)
1829 {
1830 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1831 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1832 }
1833 return true;
1834
1835 case OPT_msse2:
1836 if (!value)
1837 {
1838 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1839 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1840 }
1841 return true;
1842
1843 case OPT_msse3:
1844 if (!value)
1845 {
1846 target_flags &= ~MASK_SSE4A;
1847 target_flags_explicit |= MASK_SSE4A;
1848 }
1849 return true;
1850
1851 default:
1852 return true;
1853 }
1854 }
1855
1856 /* Sometimes certain combinations of command options do not make
1857 sense on a particular target machine. You can define a macro
1858 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1859 defined, is executed once just after all the command options have
1860 been parsed.
1861
1862 Don't use this macro to turn on various extra optimizations for
1863 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1864
1865 void
1866 override_options (void)
1867 {
1868 int i;
1869 int ix86_tune_defaulted = 0;
1870 unsigned int ix86_arch_mask, ix86_tune_mask;
1871
1872 /* Comes from final.c -- no real reason to change it. */
1873 #define MAX_CODE_ALIGN 16
1874
1875 static struct ptt
1876 {
1877 const struct processor_costs *cost; /* Processor costs */
1878 const int target_enable; /* Target flags to enable. */
1879 const int target_disable; /* Target flags to disable. */
1880 const int align_loop; /* Default alignments. */
1881 const int align_loop_max_skip;
1882 const int align_jump;
1883 const int align_jump_max_skip;
1884 const int align_func;
1885 }
1886 const processor_target_table[PROCESSOR_max] =
1887 {
1888 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1889 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1890 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1891 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1892 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1893 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1894 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1895 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1896 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1897 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1898 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1899 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1900 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1901 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1902 };
1903
1904 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1905 static struct pta
1906 {
1907 const char *const name; /* processor name or nickname. */
1908 const enum processor_type processor;
1909 const enum pta_flags
1910 {
1911 PTA_SSE = 1 << 0,
1912 PTA_SSE2 = 1 << 1,
1913 PTA_SSE3 = 1 << 2,
1914 PTA_MMX = 1 << 3,
1915 PTA_PREFETCH_SSE = 1 << 4,
1916 PTA_3DNOW = 1 << 5,
1917 PTA_3DNOW_A = 1 << 6,
1918 PTA_64BIT = 1 << 7,
1919 PTA_SSSE3 = 1 << 8,
1920 PTA_CX16 = 1 << 9,
1921 PTA_POPCNT = 1 << 10,
1922 PTA_ABM = 1 << 11,
1923 PTA_SSE4A = 1 << 12,
1924 PTA_NO_SAHF = 1 << 13
1925 } flags;
1926 }
1927 const processor_alias_table[] =
1928 {
1929 {"i386", PROCESSOR_I386, 0},
1930 {"i486", PROCESSOR_I486, 0},
1931 {"i586", PROCESSOR_PENTIUM, 0},
1932 {"pentium", PROCESSOR_PENTIUM, 0},
1933 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1934 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1935 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1936 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1937 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1938 {"i686", PROCESSOR_PENTIUMPRO, 0},
1939 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1940 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1941 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1942 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1943 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1944 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1945 | PTA_MMX | PTA_PREFETCH_SSE},
1946 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1947 | PTA_MMX | PTA_PREFETCH_SSE},
1948 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1949 | PTA_MMX | PTA_PREFETCH_SSE},
1950 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1951 | PTA_MMX | PTA_PREFETCH_SSE
1952 | PTA_CX16 | PTA_NO_SAHF},
1953 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1954 | PTA_64BIT | PTA_MMX
1955 | PTA_PREFETCH_SSE | PTA_CX16},
1956 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1957 | PTA_3DNOW_A},
1958 {"k6", PROCESSOR_K6, PTA_MMX},
1959 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1960 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1961 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1962 | PTA_3DNOW_A},
1963 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1964 | PTA_3DNOW | PTA_3DNOW_A},
1965 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1966 | PTA_3DNOW_A | PTA_SSE},
1967 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1968 | PTA_3DNOW_A | PTA_SSE},
1969 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1970 | PTA_3DNOW_A | PTA_SSE},
1971 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1972 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1973 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1974 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1975 | PTA_NO_SAHF},
1976 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1977 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1978 | PTA_SSE2 | PTA_NO_SAHF},
1979 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1980 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1981 | PTA_SSE2 | PTA_NO_SAHF},
1982 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1983 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1984 | PTA_SSE2 | PTA_NO_SAHF},
1985 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1986 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1987 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1988 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1989 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1990 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1991 };
1992
1993 int const pta_size = ARRAY_SIZE (processor_alias_table);
1994
1995 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1996 SUBTARGET_OVERRIDE_OPTIONS;
1997 #endif
1998
1999 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
2000 SUBSUBTARGET_OVERRIDE_OPTIONS;
2001 #endif
2002
2003 /* -fPIC is the default for x86_64. */
2004 if (TARGET_MACHO && TARGET_64BIT)
2005 flag_pic = 2;
2006
2007 /* Set the default values for switches whose default depends on TARGET_64BIT
2008 in case they weren't overwritten by command line options. */
2009 if (TARGET_64BIT)
2010 {
2011 /* Mach-O doesn't support omitting the frame pointer for now. */
2012 if (flag_omit_frame_pointer == 2)
2013 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
2014 if (flag_asynchronous_unwind_tables == 2)
2015 flag_asynchronous_unwind_tables = 1;
2016 if (flag_pcc_struct_return == 2)
2017 flag_pcc_struct_return = 0;
2018 }
2019 else
2020 {
2021 if (flag_omit_frame_pointer == 2)
2022 flag_omit_frame_pointer = 0;
2023 if (flag_asynchronous_unwind_tables == 2)
2024 flag_asynchronous_unwind_tables = 0;
2025 if (flag_pcc_struct_return == 2)
2026 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
2027 }
2028
2029 /* Need to check -mtune=generic first. */
2030 if (ix86_tune_string)
2031 {
2032 if (!strcmp (ix86_tune_string, "generic")
2033 || !strcmp (ix86_tune_string, "i686")
2034 /* As special support for cross compilers we read -mtune=native
2035 as -mtune=generic. With native compilers we won't see the
2036 -mtune=native, as it was changed by the driver. */
2037 || !strcmp (ix86_tune_string, "native"))
2038 {
2039 if (TARGET_64BIT)
2040 ix86_tune_string = "generic64";
2041 else
2042 ix86_tune_string = "generic32";
2043 }
2044 else if (!strncmp (ix86_tune_string, "generic", 7))
2045 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2046 }
2047 else
2048 {
2049 if (ix86_arch_string)
2050 ix86_tune_string = ix86_arch_string;
2051 if (!ix86_tune_string)
2052 {
2053 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
2054 ix86_tune_defaulted = 1;
2055 }
2056
2057 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
2058 need to use a sensible tune option. */
2059 if (!strcmp (ix86_tune_string, "generic")
2060 || !strcmp (ix86_tune_string, "x86-64")
2061 || !strcmp (ix86_tune_string, "i686"))
2062 {
2063 if (TARGET_64BIT)
2064 ix86_tune_string = "generic64";
2065 else
2066 ix86_tune_string = "generic32";
2067 }
2068 }
2069 if (ix86_stringop_string)
2070 {
2071 if (!strcmp (ix86_stringop_string, "rep_byte"))
2072 stringop_alg = rep_prefix_1_byte;
2073 else if (!strcmp (ix86_stringop_string, "libcall"))
2074 stringop_alg = libcall;
2075 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
2076 stringop_alg = rep_prefix_4_byte;
2077 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
2078 stringop_alg = rep_prefix_8_byte;
2079 else if (!strcmp (ix86_stringop_string, "byte_loop"))
2080 stringop_alg = loop_1_byte;
2081 else if (!strcmp (ix86_stringop_string, "loop"))
2082 stringop_alg = loop;
2083 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
2084 stringop_alg = unrolled_loop;
2085 else
2086 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
2087 }
2088 if (!strcmp (ix86_tune_string, "x86-64"))
2089 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
2090 "-mtune=generic instead as appropriate.");
2091
2092 if (!ix86_arch_string)
2093 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
2094 if (!strcmp (ix86_arch_string, "generic"))
2095 error ("generic CPU can be used only for -mtune= switch");
2096 if (!strncmp (ix86_arch_string, "generic", 7))
2097 error ("bad value (%s) for -march= switch", ix86_arch_string);
2098
2099 if (ix86_cmodel_string != 0)
2100 {
2101 if (!strcmp (ix86_cmodel_string, "small"))
2102 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2103 else if (!strcmp (ix86_cmodel_string, "medium"))
2104 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2105 else if (!strcmp (ix86_cmodel_string, "large"))
2106 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2107 else if (flag_pic)
2108 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2109 else if (!strcmp (ix86_cmodel_string, "32"))
2110 ix86_cmodel = CM_32;
2111 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2112 ix86_cmodel = CM_KERNEL;
2113 else
2114 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2115 }
2116 else
2117 {
2118 ix86_cmodel = CM_32;
2119 if (TARGET_64BIT)
2120 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2121 }
2122 if (ix86_asm_string != 0)
2123 {
2124 if (! TARGET_MACHO
2125 && !strcmp (ix86_asm_string, "intel"))
2126 ix86_asm_dialect = ASM_INTEL;
2127 else if (!strcmp (ix86_asm_string, "att"))
2128 ix86_asm_dialect = ASM_ATT;
2129 else
2130 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2131 }
2132 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2133 error ("code model %qs not supported in the %s bit mode",
2134 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2135 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
2136 sorry ("%i-bit mode not compiled in",
2137 (target_flags & MASK_64BIT) ? 64 : 32);
2138
2139 for (i = 0; i < pta_size; i++)
2140 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2141 {
2142 ix86_arch = processor_alias_table[i].processor;
2143 /* Default cpu tuning to the architecture. */
2144 ix86_tune = ix86_arch;
2145 if (processor_alias_table[i].flags & PTA_MMX
2146 && !(target_flags_explicit & MASK_MMX))
2147 target_flags |= MASK_MMX;
2148 if (processor_alias_table[i].flags & PTA_3DNOW
2149 && !(target_flags_explicit & MASK_3DNOW))
2150 target_flags |= MASK_3DNOW;
2151 if (processor_alias_table[i].flags & PTA_3DNOW_A
2152 && !(target_flags_explicit & MASK_3DNOW_A))
2153 target_flags |= MASK_3DNOW_A;
2154 if (processor_alias_table[i].flags & PTA_SSE
2155 && !(target_flags_explicit & MASK_SSE))
2156 target_flags |= MASK_SSE;
2157 if (processor_alias_table[i].flags & PTA_SSE2
2158 && !(target_flags_explicit & MASK_SSE2))
2159 target_flags |= MASK_SSE2;
2160 if (processor_alias_table[i].flags & PTA_SSE3
2161 && !(target_flags_explicit & MASK_SSE3))
2162 target_flags |= MASK_SSE3;
2163 if (processor_alias_table[i].flags & PTA_SSSE3
2164 && !(target_flags_explicit & MASK_SSSE3))
2165 target_flags |= MASK_SSSE3;
2166 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
2167 x86_prefetch_sse = true;
2168 if (processor_alias_table[i].flags & PTA_CX16)
2169 x86_cmpxchg16b = true;
2170 if (processor_alias_table[i].flags & PTA_POPCNT
2171 && !(target_flags_explicit & MASK_POPCNT))
2172 target_flags |= MASK_POPCNT;
2173 if (processor_alias_table[i].flags & PTA_ABM
2174 && !(target_flags_explicit & MASK_ABM))
2175 target_flags |= MASK_ABM;
2176 if (processor_alias_table[i].flags & PTA_SSE4A
2177 && !(target_flags_explicit & MASK_SSE4A))
2178 target_flags |= MASK_SSE4A;
2179 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2180 x86_sahf = true;
2181 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2182 error ("CPU you selected does not support x86-64 "
2183 "instruction set");
2184 break;
2185 }
2186
2187 if (i == pta_size)
2188 error ("bad value (%s) for -march= switch", ix86_arch_string);
2189
2190 ix86_arch_mask = 1u << ix86_arch;
2191 for (i = 0; i < X86_ARCH_LAST; ++i)
2192 ix86_arch_features[i] &= ix86_arch_mask;
2193
2194 for (i = 0; i < pta_size; i++)
2195 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2196 {
2197 ix86_tune = processor_alias_table[i].processor;
2198 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2199 {
2200 if (ix86_tune_defaulted)
2201 {
2202 ix86_tune_string = "x86-64";
2203 for (i = 0; i < pta_size; i++)
2204 if (! strcmp (ix86_tune_string,
2205 processor_alias_table[i].name))
2206 break;
2207 ix86_tune = processor_alias_table[i].processor;
2208 }
2209 else
2210 error ("CPU you selected does not support x86-64 "
2211 "instruction set");
2212 }
2213 /* Intel CPUs have always interpreted SSE prefetch instructions as
2214 NOPs; so, we can enable SSE prefetch instructions even when
2215 -mtune (rather than -march) points us to a processor that has them.
2216 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2217 higher processors. */
2218 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
2219 x86_prefetch_sse = true;
2220 break;
2221 }
2222 if (i == pta_size)
2223 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2224
2225 ix86_tune_mask = 1u << ix86_tune;
2226 for (i = 0; i < X86_TUNE_LAST; ++i)
2227 ix86_tune_features[i] &= ix86_tune_mask;
2228
2229 if (optimize_size)
2230 ix86_cost = &size_cost;
2231 else
2232 ix86_cost = processor_target_table[ix86_tune].cost;
2233 target_flags |= processor_target_table[ix86_tune].target_enable;
2234 target_flags &= ~processor_target_table[ix86_tune].target_disable;
2235
2236 /* Arrange to set up i386_stack_locals for all functions. */
2237 init_machine_status = ix86_init_machine_status;
2238
2239 /* Validate -mregparm= value. */
2240 if (ix86_regparm_string)
2241 {
2242 i = atoi (ix86_regparm_string);
2243 if (i < 0 || i > REGPARM_MAX)
2244 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2245 else
2246 ix86_regparm = i;
2247 }
2248 else
2249 if (TARGET_64BIT)
2250 ix86_regparm = REGPARM_MAX;
2251
2252 /* If the user has provided any of the -malign-* options,
2253 warn and use that value only if -falign-* is not set.
2254 Remove this code in GCC 3.2 or later. */
2255 if (ix86_align_loops_string)
2256 {
2257 warning (0, "-malign-loops is obsolete, use -falign-loops");
2258 if (align_loops == 0)
2259 {
2260 i = atoi (ix86_align_loops_string);
2261 if (i < 0 || i > MAX_CODE_ALIGN)
2262 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2263 else
2264 align_loops = 1 << i;
2265 }
2266 }
2267
2268 if (ix86_align_jumps_string)
2269 {
2270 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2271 if (align_jumps == 0)
2272 {
2273 i = atoi (ix86_align_jumps_string);
2274 if (i < 0 || i > MAX_CODE_ALIGN)
2275 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2276 else
2277 align_jumps = 1 << i;
2278 }
2279 }
2280
2281 if (ix86_align_funcs_string)
2282 {
2283 warning (0, "-malign-functions is obsolete, use -falign-functions");
2284 if (align_functions == 0)
2285 {
2286 i = atoi (ix86_align_funcs_string);
2287 if (i < 0 || i > MAX_CODE_ALIGN)
2288 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2289 else
2290 align_functions = 1 << i;
2291 }
2292 }
2293
2294 /* Default align_* from the processor table. */
2295 if (align_loops == 0)
2296 {
2297 align_loops = processor_target_table[ix86_tune].align_loop;
2298 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2299 }
2300 if (align_jumps == 0)
2301 {
2302 align_jumps = processor_target_table[ix86_tune].align_jump;
2303 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2304 }
2305 if (align_functions == 0)
2306 {
2307 align_functions = processor_target_table[ix86_tune].align_func;
2308 }
2309
2310 /* Validate -mbranch-cost= value, or provide default. */
2311 ix86_branch_cost = ix86_cost->branch_cost;
2312 if (ix86_branch_cost_string)
2313 {
2314 i = atoi (ix86_branch_cost_string);
2315 if (i < 0 || i > 5)
2316 error ("-mbranch-cost=%d is not between 0 and 5", i);
2317 else
2318 ix86_branch_cost = i;
2319 }
2320 if (ix86_section_threshold_string)
2321 {
2322 i = atoi (ix86_section_threshold_string);
2323 if (i < 0)
2324 error ("-mlarge-data-threshold=%d is negative", i);
2325 else
2326 ix86_section_threshold = i;
2327 }
2328
2329 if (ix86_tls_dialect_string)
2330 {
2331 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2332 ix86_tls_dialect = TLS_DIALECT_GNU;
2333 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2334 ix86_tls_dialect = TLS_DIALECT_GNU2;
2335 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2336 ix86_tls_dialect = TLS_DIALECT_SUN;
2337 else
2338 error ("bad value (%s) for -mtls-dialect= switch",
2339 ix86_tls_dialect_string);
2340 }
2341
2342 /* Keep nonleaf frame pointers. */
2343 if (flag_omit_frame_pointer)
2344 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2345 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2346 flag_omit_frame_pointer = 1;
2347
2348 /* If we're doing fast math, we don't care about comparison order
2349 wrt NaNs. This lets us use a shorter comparison sequence. */
2350 if (flag_finite_math_only)
2351 target_flags &= ~MASK_IEEE_FP;
2352
2353 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2354 since the insns won't need emulation. */
2355 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2356 target_flags &= ~MASK_NO_FANCY_MATH_387;
2357
2358 /* Likewise, if the target doesn't have a 387, or we've specified
2359 software floating point, don't use 387 inline intrinsics. */
2360 if (!TARGET_80387)
2361 target_flags |= MASK_NO_FANCY_MATH_387;
2362
2363 /* Turn on SSE3 builtins for -mssse3. */
2364 if (TARGET_SSSE3)
2365 target_flags |= MASK_SSE3;
2366
2367 /* Turn on SSE3 builtins for -msse4a. */
2368 if (TARGET_SSE4A)
2369 target_flags |= MASK_SSE3;
2370
2371 /* Turn on SSE2 builtins for -msse3. */
2372 if (TARGET_SSE3)
2373 target_flags |= MASK_SSE2;
2374
2375 /* Turn on SSE builtins for -msse2. */
2376 if (TARGET_SSE2)
2377 target_flags |= MASK_SSE;
2378
2379 /* Turn on MMX builtins for -msse. */
2380 if (TARGET_SSE)
2381 {
2382 target_flags |= MASK_MMX & ~target_flags_explicit;
2383 x86_prefetch_sse = true;
2384 }
2385
2386 /* Turn on MMX builtins for 3Dnow. */
2387 if (TARGET_3DNOW)
2388 target_flags |= MASK_MMX;
2389
2390 /* Turn on POPCNT builtins for -mabm. */
2391 if (TARGET_ABM)
2392 target_flags |= MASK_POPCNT;
2393
2394 if (TARGET_64BIT)
2395 {
2396 if (TARGET_ALIGN_DOUBLE)
2397 error ("-malign-double makes no sense in the 64bit mode");
2398 if (TARGET_RTD)
2399 error ("-mrtd calling convention not supported in the 64bit mode");
2400
2401 /* Enable by default the SSE and MMX builtins. Do allow the user to
2402 explicitly disable any of these. In particular, disabling SSE and
2403 MMX for kernel code is extremely useful. */
2404 target_flags
2405 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2406 & ~target_flags_explicit);
2407 }
2408 else
2409 {
2410 /* i386 ABI does not specify red zone. It still makes sense to use it
2411 when programmer takes care to stack from being destroyed. */
2412 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2413 target_flags |= MASK_NO_RED_ZONE;
2414 }
2415
2416 /* Validate -mpreferred-stack-boundary= value, or provide default.
2417 The default of 128 bits is for Pentium III's SSE __m128. We can't
2418 change it because of optimize_size. Otherwise, we can't mix object
2419 files compiled with -Os and -On. */
2420 ix86_preferred_stack_boundary = 128;
2421 if (ix86_preferred_stack_boundary_string)
2422 {
2423 i = atoi (ix86_preferred_stack_boundary_string);
2424 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2425 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2426 TARGET_64BIT ? 4 : 2);
2427 else
2428 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2429 }
2430
2431 /* Accept -msseregparm only if at least SSE support is enabled. */
2432 if (TARGET_SSEREGPARM
2433 && ! TARGET_SSE)
2434 error ("-msseregparm used without SSE enabled");
2435
2436 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2437 if (ix86_fpmath_string != 0)
2438 {
2439 if (! strcmp (ix86_fpmath_string, "387"))
2440 ix86_fpmath = FPMATH_387;
2441 else if (! strcmp (ix86_fpmath_string, "sse"))
2442 {
2443 if (!TARGET_SSE)
2444 {
2445 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2446 ix86_fpmath = FPMATH_387;
2447 }
2448 else
2449 ix86_fpmath = FPMATH_SSE;
2450 }
2451 else if (! strcmp (ix86_fpmath_string, "387,sse")
2452 || ! strcmp (ix86_fpmath_string, "sse,387"))
2453 {
2454 if (!TARGET_SSE)
2455 {
2456 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2457 ix86_fpmath = FPMATH_387;
2458 }
2459 else if (!TARGET_80387)
2460 {
2461 warning (0, "387 instruction set disabled, using SSE arithmetics");
2462 ix86_fpmath = FPMATH_SSE;
2463 }
2464 else
2465 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2466 }
2467 else
2468 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2469 }
2470
2471 /* If the i387 is disabled, then do not return values in it. */
2472 if (!TARGET_80387)
2473 target_flags &= ~MASK_FLOAT_RETURNS;
2474
2475 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2476 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2477 && !optimize_size)
2478 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2479
2480 /* ??? Unwind info is not correct around the CFG unless either a frame
2481 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2482 unwind info generation to be aware of the CFG and propagating states
2483 around edges. */
2484 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2485 || flag_exceptions || flag_non_call_exceptions)
2486 && flag_omit_frame_pointer
2487 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2488 {
2489 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2490 warning (0, "unwind tables currently require either a frame pointer "
2491 "or -maccumulate-outgoing-args for correctness");
2492 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2493 }
2494
2495 /* For sane SSE instruction set generation we need fcomi instruction.
2496 It is safe to enable all CMOVE instructions. */
2497 if (TARGET_SSE)
2498 TARGET_CMOVE = 1;
2499
2500 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2501 {
2502 char *p;
2503 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2504 p = strchr (internal_label_prefix, 'X');
2505 internal_label_prefix_len = p - internal_label_prefix;
2506 *p = '\0';
2507 }
2508
2509 /* When scheduling description is not available, disable scheduler pass
2510 so it won't slow down the compilation and make x87 code slower. */
2511 if (!TARGET_SCHEDULE)
2512 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2513
2514 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2515 set_param_value ("simultaneous-prefetches",
2516 ix86_cost->simultaneous_prefetches);
2517 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2518 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2519 }
2520 \f
2521 /* switch to the appropriate section for output of DECL.
2522 DECL is either a `VAR_DECL' node or a constant of some sort.
2523 RELOC indicates whether forming the initial value of DECL requires
2524 link-time relocations. */
2525
2526 static section *
2527 x86_64_elf_select_section (tree decl, int reloc,
2528 unsigned HOST_WIDE_INT align)
2529 {
2530 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2531 && ix86_in_large_data_p (decl))
2532 {
2533 const char *sname = NULL;
2534 unsigned int flags = SECTION_WRITE;
2535 switch (categorize_decl_for_section (decl, reloc))
2536 {
2537 case SECCAT_DATA:
2538 sname = ".ldata";
2539 break;
2540 case SECCAT_DATA_REL:
2541 sname = ".ldata.rel";
2542 break;
2543 case SECCAT_DATA_REL_LOCAL:
2544 sname = ".ldata.rel.local";
2545 break;
2546 case SECCAT_DATA_REL_RO:
2547 sname = ".ldata.rel.ro";
2548 break;
2549 case SECCAT_DATA_REL_RO_LOCAL:
2550 sname = ".ldata.rel.ro.local";
2551 break;
2552 case SECCAT_BSS:
2553 sname = ".lbss";
2554 flags |= SECTION_BSS;
2555 break;
2556 case SECCAT_RODATA:
2557 case SECCAT_RODATA_MERGE_STR:
2558 case SECCAT_RODATA_MERGE_STR_INIT:
2559 case SECCAT_RODATA_MERGE_CONST:
2560 sname = ".lrodata";
2561 flags = 0;
2562 break;
2563 case SECCAT_SRODATA:
2564 case SECCAT_SDATA:
2565 case SECCAT_SBSS:
2566 gcc_unreachable ();
2567 case SECCAT_TEXT:
2568 case SECCAT_TDATA:
2569 case SECCAT_TBSS:
2570 /* We don't split these for medium model. Place them into
2571 default sections and hope for best. */
2572 break;
2573 }
2574 if (sname)
2575 {
2576 /* We might get called with string constants, but get_named_section
2577 doesn't like them as they are not DECLs. Also, we need to set
2578 flags in that case. */
2579 if (!DECL_P (decl))
2580 return get_section (sname, flags, NULL);
2581 return get_named_section (decl, sname, reloc);
2582 }
2583 }
2584 return default_elf_select_section (decl, reloc, align);
2585 }
2586
2587 /* Build up a unique section name, expressed as a
2588 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2589 RELOC indicates whether the initial value of EXP requires
2590 link-time relocations. */
2591
2592 static void
2593 x86_64_elf_unique_section (tree decl, int reloc)
2594 {
2595 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2596 && ix86_in_large_data_p (decl))
2597 {
2598 const char *prefix = NULL;
2599 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2600 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2601
2602 switch (categorize_decl_for_section (decl, reloc))
2603 {
2604 case SECCAT_DATA:
2605 case SECCAT_DATA_REL:
2606 case SECCAT_DATA_REL_LOCAL:
2607 case SECCAT_DATA_REL_RO:
2608 case SECCAT_DATA_REL_RO_LOCAL:
2609 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2610 break;
2611 case SECCAT_BSS:
2612 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2613 break;
2614 case SECCAT_RODATA:
2615 case SECCAT_RODATA_MERGE_STR:
2616 case SECCAT_RODATA_MERGE_STR_INIT:
2617 case SECCAT_RODATA_MERGE_CONST:
2618 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2619 break;
2620 case SECCAT_SRODATA:
2621 case SECCAT_SDATA:
2622 case SECCAT_SBSS:
2623 gcc_unreachable ();
2624 case SECCAT_TEXT:
2625 case SECCAT_TDATA:
2626 case SECCAT_TBSS:
2627 /* We don't split these for medium model. Place them into
2628 default sections and hope for best. */
2629 break;
2630 }
2631 if (prefix)
2632 {
2633 const char *name;
2634 size_t nlen, plen;
2635 char *string;
2636 plen = strlen (prefix);
2637
2638 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2639 name = targetm.strip_name_encoding (name);
2640 nlen = strlen (name);
2641
2642 string = alloca (nlen + plen + 1);
2643 memcpy (string, prefix, plen);
2644 memcpy (string + plen, name, nlen + 1);
2645
2646 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2647 return;
2648 }
2649 }
2650 default_unique_section (decl, reloc);
2651 }
2652
2653 #ifdef COMMON_ASM_OP
2654 /* This says how to output assembler code to declare an
2655 uninitialized external linkage data object.
2656
2657 For medium model x86-64 we need to use .largecomm opcode for
2658 large objects. */
2659 void
2660 x86_elf_aligned_common (FILE *file,
2661 const char *name, unsigned HOST_WIDE_INT size,
2662 int align)
2663 {
2664 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2665 && size > (unsigned int)ix86_section_threshold)
2666 fprintf (file, ".largecomm\t");
2667 else
2668 fprintf (file, "%s", COMMON_ASM_OP);
2669 assemble_name (file, name);
2670 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2671 size, align / BITS_PER_UNIT);
2672 }
2673 #endif
2674 /* Utility function for targets to use in implementing
2675 ASM_OUTPUT_ALIGNED_BSS. */
2676
2677 void
2678 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2679 const char *name, unsigned HOST_WIDE_INT size,
2680 int align)
2681 {
2682 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2683 && size > (unsigned int)ix86_section_threshold)
2684 switch_to_section (get_named_section (decl, ".lbss", 0));
2685 else
2686 switch_to_section (bss_section);
2687 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2688 #ifdef ASM_DECLARE_OBJECT_NAME
2689 last_assemble_variable_decl = decl;
2690 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2691 #else
2692 /* Standard thing is just output label for the object. */
2693 ASM_OUTPUT_LABEL (file, name);
2694 #endif /* ASM_DECLARE_OBJECT_NAME */
2695 ASM_OUTPUT_SKIP (file, size ? size : 1);
2696 }
2697 \f
2698 void
2699 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2700 {
2701 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2702 make the problem with not enough registers even worse. */
2703 #ifdef INSN_SCHEDULING
2704 if (level > 1)
2705 flag_schedule_insns = 0;
2706 #endif
2707
2708 if (TARGET_MACHO)
2709 /* The Darwin libraries never set errno, so we might as well
2710 avoid calling them when that's the only reason we would. */
2711 flag_errno_math = 0;
2712
2713 /* The default values of these switches depend on the TARGET_64BIT
2714 that is not known at this moment. Mark these values with 2 and
2715 let user the to override these. In case there is no command line option
2716 specifying them, we will set the defaults in override_options. */
2717 if (optimize >= 1)
2718 flag_omit_frame_pointer = 2;
2719 flag_pcc_struct_return = 2;
2720 flag_asynchronous_unwind_tables = 2;
2721 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2722 SUBTARGET_OPTIMIZATION_OPTIONS;
2723 #endif
2724 }
2725 \f
2726 /* Table of valid machine attributes. */
2727 const struct attribute_spec ix86_attribute_table[] =
2728 {
2729 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
2730 /* Stdcall attribute says callee is responsible for popping arguments
2731 if they are not variable. */
2732 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2733 /* Fastcall attribute says callee is responsible for popping arguments
2734 if they are not variable. */
2735 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2736 /* Cdecl attribute says the callee is a normal C declaration */
2737 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2738 /* Regparm attribute specifies how many integer arguments are to be
2739 passed in registers. */
2740 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
2741 /* Sseregparm attribute says we are using x86_64 calling conventions
2742 for FP arguments. */
2743 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
2744 /* force_align_arg_pointer says this function realigns the stack at entry. */
2745 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
2746 false, true, true, ix86_handle_cconv_attribute },
2747 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2748 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
2749 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
2750 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
2751 #endif
2752 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2753 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
2754 #ifdef SUBTARGET_ATTRIBUTE_TABLE
2755 SUBTARGET_ATTRIBUTE_TABLE,
2756 #endif
2757 { NULL, 0, 0, false, false, false, NULL }
2758 };
2759
2760 /* Decide whether we can make a sibling call to a function. DECL is the
2761 declaration of the function being targeted by the call and EXP is the
2762 CALL_EXPR representing the call. */
2763
2764 static bool
2765 ix86_function_ok_for_sibcall (tree decl, tree exp)
2766 {
2767 tree func;
2768 rtx a, b;
2769
2770 /* If we are generating position-independent code, we cannot sibcall
2771 optimize any indirect call, or a direct call to a global function,
2772 as the PLT requires %ebx be live. */
2773 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2774 return false;
2775
2776 if (decl)
2777 func = decl;
2778 else
2779 {
2780 func = TREE_TYPE (CALL_EXPR_FN (exp));
2781 if (POINTER_TYPE_P (func))
2782 func = TREE_TYPE (func);
2783 }
2784
2785 /* Check that the return value locations are the same. Like
2786 if we are returning floats on the 80387 register stack, we cannot
2787 make a sibcall from a function that doesn't return a float to a
2788 function that does or, conversely, from a function that does return
2789 a float to a function that doesn't; the necessary stack adjustment
2790 would not be executed. This is also the place we notice
2791 differences in the return value ABI. Note that it is ok for one
2792 of the functions to have void return type as long as the return
2793 value of the other is passed in a register. */
2794 a = ix86_function_value (TREE_TYPE (exp), func, false);
2795 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2796 cfun->decl, false);
2797 if (STACK_REG_P (a) || STACK_REG_P (b))
2798 {
2799 if (!rtx_equal_p (a, b))
2800 return false;
2801 }
2802 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2803 ;
2804 else if (!rtx_equal_p (a, b))
2805 return false;
2806
2807 /* If this call is indirect, we'll need to be able to use a call-clobbered
2808 register for the address of the target function. Make sure that all
2809 such registers are not used for passing parameters. */
2810 if (!decl && !TARGET_64BIT)
2811 {
2812 tree type;
2813
2814 /* We're looking at the CALL_EXPR, we need the type of the function. */
2815 type = CALL_EXPR_FN (exp); /* pointer expression */
2816 type = TREE_TYPE (type); /* pointer type */
2817 type = TREE_TYPE (type); /* function type */
2818
2819 if (ix86_function_regparm (type, NULL) >= 3)
2820 {
2821 /* ??? Need to count the actual number of registers to be used,
2822 not the possible number of registers. Fix later. */
2823 return false;
2824 }
2825 }
2826
2827 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2828 /* Dllimport'd functions are also called indirectly. */
2829 if (decl && DECL_DLLIMPORT_P (decl)
2830 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2831 return false;
2832 #endif
2833
2834 /* If we forced aligned the stack, then sibcalling would unalign the
2835 stack, which may break the called function. */
2836 if (cfun->machine->force_align_arg_pointer)
2837 return false;
2838
2839 /* Otherwise okay. That also includes certain types of indirect calls. */
2840 return true;
2841 }
2842
2843 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2844 calling convention attributes;
2845 arguments as in struct attribute_spec.handler. */
2846
2847 static tree
2848 ix86_handle_cconv_attribute (tree *node, tree name,
2849 tree args,
2850 int flags ATTRIBUTE_UNUSED,
2851 bool *no_add_attrs)
2852 {
2853 if (TREE_CODE (*node) != FUNCTION_TYPE
2854 && TREE_CODE (*node) != METHOD_TYPE
2855 && TREE_CODE (*node) != FIELD_DECL
2856 && TREE_CODE (*node) != TYPE_DECL)
2857 {
2858 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2859 IDENTIFIER_POINTER (name));
2860 *no_add_attrs = true;
2861 return NULL_TREE;
2862 }
2863
2864 /* Can combine regparm with all attributes but fastcall. */
2865 if (is_attribute_p ("regparm", name))
2866 {
2867 tree cst;
2868
2869 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2870 {
2871 error ("fastcall and regparm attributes are not compatible");
2872 }
2873
2874 cst = TREE_VALUE (args);
2875 if (TREE_CODE (cst) != INTEGER_CST)
2876 {
2877 warning (OPT_Wattributes,
2878 "%qs attribute requires an integer constant argument",
2879 IDENTIFIER_POINTER (name));
2880 *no_add_attrs = true;
2881 }
2882 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2883 {
2884 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2885 IDENTIFIER_POINTER (name), REGPARM_MAX);
2886 *no_add_attrs = true;
2887 }
2888
2889 if (!TARGET_64BIT
2890 && lookup_attribute (ix86_force_align_arg_pointer_string,
2891 TYPE_ATTRIBUTES (*node))
2892 && compare_tree_int (cst, REGPARM_MAX-1))
2893 {
2894 error ("%s functions limited to %d register parameters",
2895 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2896 }
2897
2898 return NULL_TREE;
2899 }
2900
2901 if (TARGET_64BIT)
2902 {
2903 warning (OPT_Wattributes, "%qs attribute ignored",
2904 IDENTIFIER_POINTER (name));
2905 *no_add_attrs = true;
2906 return NULL_TREE;
2907 }
2908
2909 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2910 if (is_attribute_p ("fastcall", name))
2911 {
2912 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2913 {
2914 error ("fastcall and cdecl attributes are not compatible");
2915 }
2916 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2917 {
2918 error ("fastcall and stdcall attributes are not compatible");
2919 }
2920 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2921 {
2922 error ("fastcall and regparm attributes are not compatible");
2923 }
2924 }
2925
2926 /* Can combine stdcall with fastcall (redundant), regparm and
2927 sseregparm. */
2928 else if (is_attribute_p ("stdcall", name))
2929 {
2930 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2931 {
2932 error ("stdcall and cdecl attributes are not compatible");
2933 }
2934 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2935 {
2936 error ("stdcall and fastcall attributes are not compatible");
2937 }
2938 }
2939
2940 /* Can combine cdecl with regparm and sseregparm. */
2941 else if (is_attribute_p ("cdecl", name))
2942 {
2943 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2944 {
2945 error ("stdcall and cdecl attributes are not compatible");
2946 }
2947 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2948 {
2949 error ("fastcall and cdecl attributes are not compatible");
2950 }
2951 }
2952
2953 /* Can combine sseregparm with all attributes. */
2954
2955 return NULL_TREE;
2956 }
2957
2958 /* Return 0 if the attributes for two types are incompatible, 1 if they
2959 are compatible, and 2 if they are nearly compatible (which causes a
2960 warning to be generated). */
2961
2962 static int
2963 ix86_comp_type_attributes (tree type1, tree type2)
2964 {
2965 /* Check for mismatch of non-default calling convention. */
2966 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2967
2968 if (TREE_CODE (type1) != FUNCTION_TYPE)
2969 return 1;
2970
2971 /* Check for mismatched fastcall/regparm types. */
2972 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2973 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2974 || (ix86_function_regparm (type1, NULL)
2975 != ix86_function_regparm (type2, NULL)))
2976 return 0;
2977
2978 /* Check for mismatched sseregparm types. */
2979 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2980 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2981 return 0;
2982
2983 /* Check for mismatched return types (cdecl vs stdcall). */
2984 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2985 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2986 return 0;
2987
2988 return 1;
2989 }
2990 \f
2991 /* Return the regparm value for a function with the indicated TYPE and DECL.
2992 DECL may be NULL when calling function indirectly
2993 or considering a libcall. */
2994
2995 static int
2996 ix86_function_regparm (tree type, tree decl)
2997 {
2998 tree attr;
2999 int regparm = ix86_regparm;
3000 bool user_convention = false;
3001
3002 if (!TARGET_64BIT)
3003 {
3004 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
3005 if (attr)
3006 {
3007 regparm = TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
3008 user_convention = true;
3009 }
3010
3011 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
3012 {
3013 regparm = 2;
3014 user_convention = true;
3015 }
3016
3017 /* Use register calling convention for local functions when possible. */
3018 if (!TARGET_64BIT && !user_convention && decl
3019 && flag_unit_at_a_time && !profile_flag)
3020 {
3021 struct cgraph_local_info *i = cgraph_local_info (decl);
3022 if (i && i->local)
3023 {
3024 int local_regparm, globals = 0, regno;
3025
3026 /* Make sure no regparm register is taken by a global register
3027 variable. */
3028 for (local_regparm = 0; local_regparm < 3; local_regparm++)
3029 if (global_regs[local_regparm])
3030 break;
3031 /* We can't use regparm(3) for nested functions as these use
3032 static chain pointer in third argument. */
3033 if (local_regparm == 3
3034 && decl_function_context (decl)
3035 && !DECL_NO_STATIC_CHAIN (decl))
3036 local_regparm = 2;
3037 /* If the function realigns its stackpointer, the
3038 prologue will clobber %ecx. If we've already
3039 generated code for the callee, the callee
3040 DECL_STRUCT_FUNCTION is gone, so we fall back to
3041 scanning the attributes for the self-realigning
3042 property. */
3043 if ((DECL_STRUCT_FUNCTION (decl)
3044 && DECL_STRUCT_FUNCTION (decl)->machine->force_align_arg_pointer)
3045 || (!DECL_STRUCT_FUNCTION (decl)
3046 && lookup_attribute (ix86_force_align_arg_pointer_string,
3047 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3048 local_regparm = 2;
3049 /* Each global register variable increases register preassure,
3050 so the more global reg vars there are, the smaller regparm
3051 optimization use, unless requested by the user explicitly. */
3052 for (regno = 0; regno < 6; regno++)
3053 if (global_regs[regno])
3054 globals++;
3055 local_regparm
3056 = globals < local_regparm ? local_regparm - globals : 0;
3057
3058 if (local_regparm > regparm)
3059 regparm = local_regparm;
3060 }
3061 }
3062 }
3063 return regparm;
3064 }
3065
3066 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3067 DFmode (2) arguments in SSE registers for a function with the
3068 indicated TYPE and DECL. DECL may be NULL when calling function
3069 indirectly or considering a libcall. Otherwise return 0. */
3070
3071 static int
3072 ix86_function_sseregparm (tree type, tree decl)
3073 {
3074 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3075 by the sseregparm attribute. */
3076 if (TARGET_SSEREGPARM
3077 || (type
3078 && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3079 {
3080 if (!TARGET_SSE)
3081 {
3082 if (decl)
3083 error ("Calling %qD with attribute sseregparm without "
3084 "SSE/SSE2 enabled", decl);
3085 else
3086 error ("Calling %qT with attribute sseregparm without "
3087 "SSE/SSE2 enabled", type);
3088 return 0;
3089 }
3090
3091 return 2;
3092 }
3093
3094 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3095 (and DFmode for SSE2) arguments in SSE registers,
3096 even for 32-bit targets. */
3097 if (!TARGET_64BIT && decl
3098 && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3099 {
3100 struct cgraph_local_info *i = cgraph_local_info (decl);
3101 if (i && i->local)
3102 return TARGET_SSE2 ? 2 : 1;
3103 }
3104
3105 return 0;
3106 }
3107
3108 /* Return true if EAX is live at the start of the function. Used by
3109 ix86_expand_prologue to determine if we need special help before
3110 calling allocate_stack_worker. */
3111
3112 static bool
3113 ix86_eax_live_at_start_p (void)
3114 {
3115 /* Cheat. Don't bother working forward from ix86_function_regparm
3116 to the function type to whether an actual argument is located in
3117 eax. Instead just look at cfg info, which is still close enough
3118 to correct at this point. This gives false positives for broken
3119 functions that might use uninitialized data that happens to be
3120 allocated in eax, but who cares? */
3121 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
3122 }
3123
3124 /* Value is the number of bytes of arguments automatically
3125 popped when returning from a subroutine call.
3126 FUNDECL is the declaration node of the function (as a tree),
3127 FUNTYPE is the data type of the function (as a tree),
3128 or for a library call it is an identifier node for the subroutine name.
3129 SIZE is the number of bytes of arguments passed on the stack.
3130
3131 On the 80386, the RTD insn may be used to pop them if the number
3132 of args is fixed, but if the number is variable then the caller
3133 must pop them all. RTD can't be used for library calls now
3134 because the library is compiled with the Unix compiler.
3135 Use of RTD is a selectable option, since it is incompatible with
3136 standard Unix calling sequences. If the option is not selected,
3137 the caller must always pop the args.
3138
3139 The attribute stdcall is equivalent to RTD on a per module basis. */
3140
3141 int
3142 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3143 {
3144 int rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3145
3146 /* Cdecl functions override -mrtd, and never pop the stack. */
3147 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype))) {
3148
3149 /* Stdcall and fastcall functions will pop the stack if not
3150 variable args. */
3151 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3152 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3153 rtd = 1;
3154
3155 if (rtd
3156 && (TYPE_ARG_TYPES (funtype) == NULL_TREE
3157 || (TREE_VALUE (tree_last (TYPE_ARG_TYPES (funtype)))
3158 == void_type_node)))
3159 return size;
3160 }
3161
3162 /* Lose any fake structure return argument if it is passed on the stack. */
3163 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3164 && !TARGET_64BIT
3165 && !KEEP_AGGREGATE_RETURN_POINTER)
3166 {
3167 int nregs = ix86_function_regparm (funtype, fundecl);
3168
3169 if (!nregs)
3170 return GET_MODE_SIZE (Pmode);
3171 }
3172
3173 return 0;
3174 }
3175 \f
3176 /* Argument support functions. */
3177
3178 /* Return true when register may be used to pass function parameters. */
3179 bool
3180 ix86_function_arg_regno_p (int regno)
3181 {
3182 int i;
3183 if (!TARGET_64BIT)
3184 {
3185 if (TARGET_MACHO)
3186 return (regno < REGPARM_MAX
3187 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3188 else
3189 return (regno < REGPARM_MAX
3190 || (TARGET_MMX && MMX_REGNO_P (regno)
3191 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3192 || (TARGET_SSE && SSE_REGNO_P (regno)
3193 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3194 }
3195
3196 if (TARGET_MACHO)
3197 {
3198 if (SSE_REGNO_P (regno) && TARGET_SSE)
3199 return true;
3200 }
3201 else
3202 {
3203 if (TARGET_SSE && SSE_REGNO_P (regno)
3204 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3205 return true;
3206 }
3207 /* RAX is used as hidden argument to va_arg functions. */
3208 if (!regno)
3209 return true;
3210 for (i = 0; i < REGPARM_MAX; i++)
3211 if (regno == x86_64_int_parameter_registers[i])
3212 return true;
3213 return false;
3214 }
3215
3216 /* Return if we do not know how to pass TYPE solely in registers. */
3217
3218 static bool
3219 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
3220 {
3221 if (must_pass_in_stack_var_size_or_pad (mode, type))
3222 return true;
3223
3224 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3225 The layout_type routine is crafty and tries to trick us into passing
3226 currently unsupported vector types on the stack by using TImode. */
3227 return (!TARGET_64BIT && mode == TImode
3228 && type && TREE_CODE (type) != VECTOR_TYPE);
3229 }
3230
3231 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3232 for a call to a function whose data type is FNTYPE.
3233 For a library call, FNTYPE is 0. */
3234
3235 void
3236 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3237 tree fntype, /* tree ptr for function decl */
3238 rtx libname, /* SYMBOL_REF of library name or 0 */
3239 tree fndecl)
3240 {
3241 static CUMULATIVE_ARGS zero_cum;
3242 tree param, next_param;
3243
3244 if (TARGET_DEBUG_ARG)
3245 {
3246 fprintf (stderr, "\ninit_cumulative_args (");
3247 if (fntype)
3248 fprintf (stderr, "fntype code = %s, ret code = %s",
3249 tree_code_name[(int) TREE_CODE (fntype)],
3250 tree_code_name[(int) TREE_CODE (TREE_TYPE (fntype))]);
3251 else
3252 fprintf (stderr, "no fntype");
3253
3254 if (libname)
3255 fprintf (stderr, ", libname = %s", XSTR (libname, 0));
3256 }
3257
3258 *cum = zero_cum;
3259
3260 /* Set up the number of registers to use for passing arguments. */
3261 cum->nregs = ix86_regparm;
3262 if (TARGET_SSE)
3263 cum->sse_nregs = SSE_REGPARM_MAX;
3264 if (TARGET_MMX)
3265 cum->mmx_nregs = MMX_REGPARM_MAX;
3266 cum->warn_sse = true;
3267 cum->warn_mmx = true;
3268 cum->maybe_vaarg = false;
3269
3270 /* Use ecx and edx registers if function has fastcall attribute,
3271 else look for regparm information. */
3272 if (fntype && !TARGET_64BIT)
3273 {
3274 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3275 {
3276 cum->nregs = 2;
3277 cum->fastcall = 1;
3278 }
3279 else
3280 cum->nregs = ix86_function_regparm (fntype, fndecl);
3281 }
3282
3283 /* Set up the number of SSE registers used for passing SFmode
3284 and DFmode arguments. Warn for mismatching ABI. */
3285 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3286
3287 /* Determine if this function has variable arguments. This is
3288 indicated by the last argument being 'void_type_mode' if there
3289 are no variable arguments. If there are variable arguments, then
3290 we won't pass anything in registers in 32-bit mode. */
3291
3292 if (cum->nregs || cum->mmx_nregs || cum->sse_nregs)
3293 {
3294 for (param = (fntype) ? TYPE_ARG_TYPES (fntype) : 0;
3295 param != 0; param = next_param)
3296 {
3297 next_param = TREE_CHAIN (param);
3298 if (next_param == 0 && TREE_VALUE (param) != void_type_node)
3299 {
3300 if (!TARGET_64BIT)
3301 {
3302 cum->nregs = 0;
3303 cum->sse_nregs = 0;
3304 cum->mmx_nregs = 0;
3305 cum->warn_sse = 0;
3306 cum->warn_mmx = 0;
3307 cum->fastcall = 0;
3308 cum->float_in_sse = 0;
3309 }
3310 cum->maybe_vaarg = true;
3311 }
3312 }
3313 }
3314 if ((!fntype && !libname)
3315 || (fntype && !TYPE_ARG_TYPES (fntype)))
3316 cum->maybe_vaarg = true;
3317
3318 if (TARGET_DEBUG_ARG)
3319 fprintf (stderr, ", nregs=%d )\n", cum->nregs);
3320
3321 return;
3322 }
3323
3324 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3325 But in the case of vector types, it is some vector mode.
3326
3327 When we have only some of our vector isa extensions enabled, then there
3328 are some modes for which vector_mode_supported_p is false. For these
3329 modes, the generic vector support in gcc will choose some non-vector mode
3330 in order to implement the type. By computing the natural mode, we'll
3331 select the proper ABI location for the operand and not depend on whatever
3332 the middle-end decides to do with these vector types. */
3333
3334 static enum machine_mode
3335 type_natural_mode (tree type)
3336 {
3337 enum machine_mode mode = TYPE_MODE (type);
3338
3339 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3340 {
3341 HOST_WIDE_INT size = int_size_in_bytes (type);
3342 if ((size == 8 || size == 16)
3343 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3344 && TYPE_VECTOR_SUBPARTS (type) > 1)
3345 {
3346 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3347
3348 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3349 mode = MIN_MODE_VECTOR_FLOAT;
3350 else
3351 mode = MIN_MODE_VECTOR_INT;
3352
3353 /* Get the mode which has this inner mode and number of units. */
3354 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3355 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3356 && GET_MODE_INNER (mode) == innermode)
3357 return mode;
3358
3359 gcc_unreachable ();
3360 }
3361 }
3362
3363 return mode;
3364 }
3365
3366 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3367 this may not agree with the mode that the type system has chosen for the
3368 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3369 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3370
3371 static rtx
3372 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3373 unsigned int regno)
3374 {
3375 rtx tmp;
3376
3377 if (orig_mode != BLKmode)
3378 tmp = gen_rtx_REG (orig_mode, regno);
3379 else
3380 {
3381 tmp = gen_rtx_REG (mode, regno);
3382 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3383 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3384 }
3385
3386 return tmp;
3387 }
3388
3389 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3390 of this code is to classify each 8bytes of incoming argument by the register
3391 class and assign registers accordingly. */
3392
3393 /* Return the union class of CLASS1 and CLASS2.
3394 See the x86-64 PS ABI for details. */
3395
3396 static enum x86_64_reg_class
3397 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3398 {
3399 /* Rule #1: If both classes are equal, this is the resulting class. */
3400 if (class1 == class2)
3401 return class1;
3402
3403 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3404 the other class. */
3405 if (class1 == X86_64_NO_CLASS)
3406 return class2;
3407 if (class2 == X86_64_NO_CLASS)
3408 return class1;
3409
3410 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3411 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3412 return X86_64_MEMORY_CLASS;
3413
3414 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3415 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3416 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3417 return X86_64_INTEGERSI_CLASS;
3418 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3419 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3420 return X86_64_INTEGER_CLASS;
3421
3422 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3423 MEMORY is used. */
3424 if (class1 == X86_64_X87_CLASS
3425 || class1 == X86_64_X87UP_CLASS
3426 || class1 == X86_64_COMPLEX_X87_CLASS
3427 || class2 == X86_64_X87_CLASS
3428 || class2 == X86_64_X87UP_CLASS
3429 || class2 == X86_64_COMPLEX_X87_CLASS)
3430 return X86_64_MEMORY_CLASS;
3431
3432 /* Rule #6: Otherwise class SSE is used. */
3433 return X86_64_SSE_CLASS;
3434 }
3435
3436 /* Classify the argument of type TYPE and mode MODE.
3437 CLASSES will be filled by the register class used to pass each word
3438 of the operand. The number of words is returned. In case the parameter
3439 should be passed in memory, 0 is returned. As a special case for zero
3440 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3441
3442 BIT_OFFSET is used internally for handling records and specifies offset
3443 of the offset in bits modulo 256 to avoid overflow cases.
3444
3445 See the x86-64 PS ABI for details.
3446 */
3447
3448 static int
3449 classify_argument (enum machine_mode mode, tree type,
3450 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3451 {
3452 HOST_WIDE_INT bytes =
3453 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3454 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3455
3456 /* Variable sized entities are always passed/returned in memory. */
3457 if (bytes < 0)
3458 return 0;
3459
3460 if (mode != VOIDmode
3461 && targetm.calls.must_pass_in_stack (mode, type))
3462 return 0;
3463
3464 if (type && AGGREGATE_TYPE_P (type))
3465 {
3466 int i;
3467 tree field;
3468 enum x86_64_reg_class subclasses[MAX_CLASSES];
3469
3470 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3471 if (bytes > 16)
3472 return 0;
3473
3474 for (i = 0; i < words; i++)
3475 classes[i] = X86_64_NO_CLASS;
3476
3477 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3478 signalize memory class, so handle it as special case. */
3479 if (!words)
3480 {
3481 classes[0] = X86_64_NO_CLASS;
3482 return 1;
3483 }
3484
3485 /* Classify each field of record and merge classes. */
3486 switch (TREE_CODE (type))
3487 {
3488 case RECORD_TYPE:
3489 /* And now merge the fields of structure. */
3490 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3491 {
3492 if (TREE_CODE (field) == FIELD_DECL)
3493 {
3494 int num;
3495
3496 if (TREE_TYPE (field) == error_mark_node)
3497 continue;
3498
3499 /* Bitfields are always classified as integer. Handle them
3500 early, since later code would consider them to be
3501 misaligned integers. */
3502 if (DECL_BIT_FIELD (field))
3503 {
3504 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3505 i < ((int_bit_position (field) + (bit_offset % 64))
3506 + tree_low_cst (DECL_SIZE (field), 0)
3507 + 63) / 8 / 8; i++)
3508 classes[i] =
3509 merge_classes (X86_64_INTEGER_CLASS,
3510 classes[i]);
3511 }
3512 else
3513 {
3514 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3515 TREE_TYPE (field), subclasses,
3516 (int_bit_position (field)
3517 + bit_offset) % 256);
3518 if (!num)
3519 return 0;
3520 for (i = 0; i < num; i++)
3521 {
3522 int pos =
3523 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3524 classes[i + pos] =
3525 merge_classes (subclasses[i], classes[i + pos]);
3526 }
3527 }
3528 }
3529 }
3530 break;
3531
3532 case ARRAY_TYPE:
3533 /* Arrays are handled as small records. */
3534 {
3535 int num;
3536 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3537 TREE_TYPE (type), subclasses, bit_offset);
3538 if (!num)
3539 return 0;
3540
3541 /* The partial classes are now full classes. */
3542 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3543 subclasses[0] = X86_64_SSE_CLASS;
3544 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3545 subclasses[0] = X86_64_INTEGER_CLASS;
3546
3547 for (i = 0; i < words; i++)
3548 classes[i] = subclasses[i % num];
3549
3550 break;
3551 }
3552 case UNION_TYPE:
3553 case QUAL_UNION_TYPE:
3554 /* Unions are similar to RECORD_TYPE but offset is always 0.
3555 */
3556 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3557 {
3558 if (TREE_CODE (field) == FIELD_DECL)
3559 {
3560 int num;
3561
3562 if (TREE_TYPE (field) == error_mark_node)
3563 continue;
3564
3565 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3566 TREE_TYPE (field), subclasses,
3567 bit_offset);
3568 if (!num)
3569 return 0;
3570 for (i = 0; i < num; i++)
3571 classes[i] = merge_classes (subclasses[i], classes[i]);
3572 }
3573 }
3574 break;
3575
3576 default:
3577 gcc_unreachable ();
3578 }
3579
3580 /* Final merger cleanup. */
3581 for (i = 0; i < words; i++)
3582 {
3583 /* If one class is MEMORY, everything should be passed in
3584 memory. */
3585 if (classes[i] == X86_64_MEMORY_CLASS)
3586 return 0;
3587
3588 /* The X86_64_SSEUP_CLASS should be always preceded by
3589 X86_64_SSE_CLASS. */
3590 if (classes[i] == X86_64_SSEUP_CLASS
3591 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3592 classes[i] = X86_64_SSE_CLASS;
3593
3594 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3595 if (classes[i] == X86_64_X87UP_CLASS
3596 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3597 classes[i] = X86_64_SSE_CLASS;
3598 }
3599 return words;
3600 }
3601
3602 /* Compute alignment needed. We align all types to natural boundaries with
3603 exception of XFmode that is aligned to 64bits. */
3604 if (mode != VOIDmode && mode != BLKmode)
3605 {
3606 int mode_alignment = GET_MODE_BITSIZE (mode);
3607
3608 if (mode == XFmode)
3609 mode_alignment = 128;
3610 else if (mode == XCmode)
3611 mode_alignment = 256;
3612 if (COMPLEX_MODE_P (mode))
3613 mode_alignment /= 2;
3614 /* Misaligned fields are always returned in memory. */
3615 if (bit_offset % mode_alignment)
3616 return 0;
3617 }
3618
3619 /* for V1xx modes, just use the base mode */
3620 if (VECTOR_MODE_P (mode)
3621 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3622 mode = GET_MODE_INNER (mode);
3623
3624 /* Classification of atomic types. */
3625 switch (mode)
3626 {
3627 case SDmode:
3628 case DDmode:
3629 classes[0] = X86_64_SSE_CLASS;
3630 return 1;
3631 case TDmode:
3632 classes[0] = X86_64_SSE_CLASS;
3633 classes[1] = X86_64_SSEUP_CLASS;
3634 return 2;
3635 case DImode:
3636 case SImode:
3637 case HImode:
3638 case QImode:
3639 case CSImode:
3640 case CHImode:
3641 case CQImode:
3642 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3643 classes[0] = X86_64_INTEGERSI_CLASS;
3644 else
3645 classes[0] = X86_64_INTEGER_CLASS;
3646 return 1;
3647 case CDImode:
3648 case TImode:
3649 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3650 return 2;
3651 case CTImode:
3652 return 0;
3653 case SFmode:
3654 if (!(bit_offset % 64))
3655 classes[0] = X86_64_SSESF_CLASS;
3656 else
3657 classes[0] = X86_64_SSE_CLASS;
3658 return 1;
3659 case DFmode:
3660 classes[0] = X86_64_SSEDF_CLASS;
3661 return 1;
3662 case XFmode:
3663 classes[0] = X86_64_X87_CLASS;
3664 classes[1] = X86_64_X87UP_CLASS;
3665 return 2;
3666 case TFmode:
3667 classes[0] = X86_64_SSE_CLASS;
3668 classes[1] = X86_64_SSEUP_CLASS;
3669 return 2;
3670 case SCmode:
3671 classes[0] = X86_64_SSE_CLASS;
3672 return 1;
3673 case DCmode:
3674 classes[0] = X86_64_SSEDF_CLASS;
3675 classes[1] = X86_64_SSEDF_CLASS;
3676 return 2;
3677 case XCmode:
3678 classes[0] = X86_64_COMPLEX_X87_CLASS;
3679 return 1;
3680 case TCmode:
3681 /* This modes is larger than 16 bytes. */
3682 return 0;
3683 case V4SFmode:
3684 case V4SImode:
3685 case V16QImode:
3686 case V8HImode:
3687 case V2DFmode:
3688 case V2DImode:
3689 classes[0] = X86_64_SSE_CLASS;
3690 classes[1] = X86_64_SSEUP_CLASS;
3691 return 2;
3692 case V2SFmode:
3693 case V2SImode:
3694 case V4HImode:
3695 case V8QImode:
3696 classes[0] = X86_64_SSE_CLASS;
3697 return 1;
3698 case BLKmode:
3699 case VOIDmode:
3700 return 0;
3701 default:
3702 gcc_assert (VECTOR_MODE_P (mode));
3703
3704 if (bytes > 16)
3705 return 0;
3706
3707 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3708
3709 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3710 classes[0] = X86_64_INTEGERSI_CLASS;
3711 else
3712 classes[0] = X86_64_INTEGER_CLASS;
3713 classes[1] = X86_64_INTEGER_CLASS;
3714 return 1 + (bytes > 8);
3715 }
3716 }
3717
3718 /* Examine the argument and return set number of register required in each
3719 class. Return 0 iff parameter should be passed in memory. */
3720 static int
3721 examine_argument (enum machine_mode mode, tree type, int in_return,
3722 int *int_nregs, int *sse_nregs)
3723 {
3724 enum x86_64_reg_class class[MAX_CLASSES];
3725 int n = classify_argument (mode, type, class, 0);
3726
3727 *int_nregs = 0;
3728 *sse_nregs = 0;
3729 if (!n)
3730 return 0;
3731 for (n--; n >= 0; n--)
3732 switch (class[n])
3733 {
3734 case X86_64_INTEGER_CLASS:
3735 case X86_64_INTEGERSI_CLASS:
3736 (*int_nregs)++;
3737 break;
3738 case X86_64_SSE_CLASS:
3739 case X86_64_SSESF_CLASS:
3740 case X86_64_SSEDF_CLASS:
3741 (*sse_nregs)++;
3742 break;
3743 case X86_64_NO_CLASS:
3744 case X86_64_SSEUP_CLASS:
3745 break;
3746 case X86_64_X87_CLASS:
3747 case X86_64_X87UP_CLASS:
3748 if (!in_return)
3749 return 0;
3750 break;
3751 case X86_64_COMPLEX_X87_CLASS:
3752 return in_return ? 2 : 0;
3753 case X86_64_MEMORY_CLASS:
3754 gcc_unreachable ();
3755 }
3756 return 1;
3757 }
3758
3759 /* Construct container for the argument used by GCC interface. See
3760 FUNCTION_ARG for the detailed description. */
3761
3762 static rtx
3763 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3764 tree type, int in_return, int nintregs, int nsseregs,
3765 const int *intreg, int sse_regno)
3766 {
3767 /* The following variables hold the static issued_error state. */
3768 static bool issued_sse_arg_error;
3769 static bool issued_sse_ret_error;
3770 static bool issued_x87_ret_error;
3771
3772 enum machine_mode tmpmode;
3773 int bytes =
3774 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3775 enum x86_64_reg_class class[MAX_CLASSES];
3776 int n;
3777 int i;
3778 int nexps = 0;
3779 int needed_sseregs, needed_intregs;
3780 rtx exp[MAX_CLASSES];
3781 rtx ret;
3782
3783 n = classify_argument (mode, type, class, 0);
3784 if (TARGET_DEBUG_ARG)
3785 {
3786 if (!n)
3787 fprintf (stderr, "Memory class\n");
3788 else
3789 {
3790 fprintf (stderr, "Classes:");
3791 for (i = 0; i < n; i++)
3792 {
3793 fprintf (stderr, " %s", x86_64_reg_class_name[class[i]]);
3794 }
3795 fprintf (stderr, "\n");
3796 }
3797 }
3798 if (!n)
3799 return NULL;
3800 if (!examine_argument (mode, type, in_return, &needed_intregs,
3801 &needed_sseregs))
3802 return NULL;
3803 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3804 return NULL;
3805
3806 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3807 some less clueful developer tries to use floating-point anyway. */
3808 if (needed_sseregs && !TARGET_SSE)
3809 {
3810 if (in_return)
3811 {
3812 if (!issued_sse_ret_error)
3813 {
3814 error ("SSE register return with SSE disabled");
3815 issued_sse_ret_error = true;
3816 }
3817 }
3818 else if (!issued_sse_arg_error)
3819 {
3820 error ("SSE register argument with SSE disabled");
3821 issued_sse_arg_error = true;
3822 }
3823 return NULL;
3824 }
3825
3826 /* Likewise, error if the ABI requires us to return values in the
3827 x87 registers and the user specified -mno-80387. */
3828 if (!TARGET_80387 && in_return)
3829 for (i = 0; i < n; i++)
3830 if (class[i] == X86_64_X87_CLASS
3831 || class[i] == X86_64_X87UP_CLASS
3832 || class[i] == X86_64_COMPLEX_X87_CLASS)
3833 {
3834 if (!issued_x87_ret_error)
3835 {
3836 error ("x87 register return with x87 disabled");
3837 issued_x87_ret_error = true;
3838 }
3839 return NULL;
3840 }
3841
3842 /* First construct simple cases. Avoid SCmode, since we want to use
3843 single register to pass this type. */
3844 if (n == 1 && mode != SCmode)
3845 switch (class[0])
3846 {
3847 case X86_64_INTEGER_CLASS:
3848 case X86_64_INTEGERSI_CLASS:
3849 return gen_rtx_REG (mode, intreg[0]);
3850 case X86_64_SSE_CLASS:
3851 case X86_64_SSESF_CLASS:
3852 case X86_64_SSEDF_CLASS:
3853 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3854 case X86_64_X87_CLASS:
3855 case X86_64_COMPLEX_X87_CLASS:
3856 return gen_rtx_REG (mode, FIRST_STACK_REG);
3857 case X86_64_NO_CLASS:
3858 /* Zero sized array, struct or class. */
3859 return NULL;
3860 default:
3861 gcc_unreachable ();
3862 }
3863 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3864 && mode != BLKmode)
3865 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3866 if (n == 2
3867 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3868 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3869 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3870 && class[1] == X86_64_INTEGER_CLASS
3871 && (mode == CDImode || mode == TImode || mode == TFmode)
3872 && intreg[0] + 1 == intreg[1])
3873 return gen_rtx_REG (mode, intreg[0]);
3874
3875 /* Otherwise figure out the entries of the PARALLEL. */
3876 for (i = 0; i < n; i++)
3877 {
3878 switch (class[i])
3879 {
3880 case X86_64_NO_CLASS:
3881 break;
3882 case X86_64_INTEGER_CLASS:
3883 case X86_64_INTEGERSI_CLASS:
3884 /* Merge TImodes on aligned occasions here too. */
3885 if (i * 8 + 8 > bytes)
3886 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3887 else if (class[i] == X86_64_INTEGERSI_CLASS)
3888 tmpmode = SImode;
3889 else
3890 tmpmode = DImode;
3891 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3892 if (tmpmode == BLKmode)
3893 tmpmode = DImode;
3894 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3895 gen_rtx_REG (tmpmode, *intreg),
3896 GEN_INT (i*8));
3897 intreg++;
3898 break;
3899 case X86_64_SSESF_CLASS:
3900 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3901 gen_rtx_REG (SFmode,
3902 SSE_REGNO (sse_regno)),
3903 GEN_INT (i*8));
3904 sse_regno++;
3905 break;
3906 case X86_64_SSEDF_CLASS:
3907 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3908 gen_rtx_REG (DFmode,
3909 SSE_REGNO (sse_regno)),
3910 GEN_INT (i*8));
3911 sse_regno++;
3912 break;
3913 case X86_64_SSE_CLASS:
3914 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3915 tmpmode = TImode;
3916 else
3917 tmpmode = DImode;
3918 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3919 gen_rtx_REG (tmpmode,
3920 SSE_REGNO (sse_regno)),
3921 GEN_INT (i*8));
3922 if (tmpmode == TImode)
3923 i++;
3924 sse_regno++;
3925 break;
3926 default:
3927 gcc_unreachable ();
3928 }
3929 }
3930
3931 /* Empty aligned struct, union or class. */
3932 if (nexps == 0)
3933 return NULL;
3934
3935 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3936 for (i = 0; i < nexps; i++)
3937 XVECEXP (ret, 0, i) = exp [i];
3938 return ret;
3939 }
3940
3941 /* Update the data in CUM to advance over an argument
3942 of mode MODE and data type TYPE.
3943 (TYPE is null for libcalls where that information may not be available.) */
3944
3945 void
3946 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3947 tree type, int named)
3948 {
3949 int bytes =
3950 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3951 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3952
3953 if (type)
3954 mode = type_natural_mode (type);
3955
3956 if (TARGET_DEBUG_ARG)
3957 fprintf (stderr, "function_adv (sz=%d, wds=%2d, nregs=%d, ssenregs=%d, "
3958 "mode=%s, named=%d)\n\n",
3959 words, cum->words, cum->nregs, cum->sse_nregs,
3960 GET_MODE_NAME (mode), named);
3961
3962 if (TARGET_64BIT)
3963 {
3964 int int_nregs, sse_nregs;
3965 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3966 cum->words += words;
3967 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3968 {
3969 cum->nregs -= int_nregs;
3970 cum->sse_nregs -= sse_nregs;
3971 cum->regno += int_nregs;
3972 cum->sse_regno += sse_nregs;
3973 }
3974 else
3975 cum->words += words;
3976 }
3977 else
3978 {
3979 switch (mode)
3980 {
3981 default:
3982 break;
3983
3984 case BLKmode:
3985 if (bytes < 0)
3986 break;
3987 /* FALLTHRU */
3988
3989 case DImode:
3990 case SImode:
3991 case HImode:
3992 case QImode:
3993 cum->words += words;
3994 cum->nregs -= words;
3995 cum->regno += words;
3996
3997 if (cum->nregs <= 0)
3998 {
3999 cum->nregs = 0;
4000 cum->regno = 0;
4001 }
4002 break;
4003
4004 case DFmode:
4005 if (cum->float_in_sse < 2)
4006 break;
4007 case SFmode:
4008 if (cum->float_in_sse < 1)
4009 break;
4010 /* FALLTHRU */
4011
4012 case TImode:
4013 case V16QImode:
4014 case V8HImode:
4015 case V4SImode:
4016 case V2DImode:
4017 case V4SFmode:
4018 case V2DFmode:
4019 if (!type || !AGGREGATE_TYPE_P (type))
4020 {
4021 cum->sse_words += words;
4022 cum->sse_nregs -= 1;
4023 cum->sse_regno += 1;
4024 if (cum->sse_nregs <= 0)
4025 {
4026 cum->sse_nregs = 0;
4027 cum->sse_regno = 0;
4028 }
4029 }
4030 break;
4031
4032 case V8QImode:
4033 case V4HImode:
4034 case V2SImode:
4035 case V2SFmode:
4036 if (!type || !AGGREGATE_TYPE_P (type))
4037 {
4038 cum->mmx_words += words;
4039 cum->mmx_nregs -= 1;
4040 cum->mmx_regno += 1;
4041 if (cum->mmx_nregs <= 0)
4042 {
4043 cum->mmx_nregs = 0;
4044 cum->mmx_regno = 0;
4045 }
4046 }
4047 break;
4048 }
4049 }
4050 }
4051
4052 /* Define where to put the arguments to a function.
4053 Value is zero to push the argument on the stack,
4054 or a hard register in which to store the argument.
4055
4056 MODE is the argument's machine mode.
4057 TYPE is the data type of the argument (as a tree).
4058 This is null for libcalls where that information may
4059 not be available.
4060 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4061 the preceding args and about the function being called.
4062 NAMED is nonzero if this argument is a named parameter
4063 (otherwise it is an extra parameter matching an ellipsis). */
4064
4065 rtx
4066 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode orig_mode,
4067 tree type, int named)
4068 {
4069 enum machine_mode mode = orig_mode;
4070 rtx ret = NULL_RTX;
4071 int bytes =
4072 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
4073 int words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4074 static bool warnedsse, warnedmmx;
4075
4076 /* To simplify the code below, represent vector types with a vector mode
4077 even if MMX/SSE are not active. */
4078 if (type && TREE_CODE (type) == VECTOR_TYPE)
4079 mode = type_natural_mode (type);
4080
4081 /* Handle a hidden AL argument containing number of registers for varargs
4082 x86-64 functions. For i386 ABI just return constm1_rtx to avoid
4083 any AL settings. */
4084 if (mode == VOIDmode)
4085 {
4086 if (TARGET_64BIT)
4087 return GEN_INT (cum->maybe_vaarg
4088 ? (cum->sse_nregs < 0
4089 ? SSE_REGPARM_MAX
4090 : cum->sse_regno)
4091 : -1);
4092 else
4093 return constm1_rtx;
4094 }
4095 if (TARGET_64BIT)
4096 ret = construct_container (mode, orig_mode, type, 0, cum->nregs,
4097 cum->sse_nregs,
4098 &x86_64_int_parameter_registers [cum->regno],
4099 cum->sse_regno);
4100 else
4101 switch (mode)
4102 {
4103 /* For now, pass fp/complex values on the stack. */
4104 default:
4105 break;
4106
4107 case BLKmode:
4108 if (bytes < 0)
4109 break;
4110 /* FALLTHRU */
4111 case DImode:
4112 case SImode:
4113 case HImode:
4114 case QImode:
4115 if (words <= cum->nregs)
4116 {
4117 int regno = cum->regno;
4118
4119 /* Fastcall allocates the first two DWORD (SImode) or
4120 smaller arguments to ECX and EDX. */
4121 if (cum->fastcall)
4122 {
4123 if (mode == BLKmode || mode == DImode)
4124 break;
4125
4126 /* ECX not EAX is the first allocated register. */
4127 if (regno == 0)
4128 regno = 2;
4129 }
4130 ret = gen_rtx_REG (mode, regno);
4131 }
4132 break;
4133 case DFmode:
4134 if (cum->float_in_sse < 2)
4135 break;
4136 case SFmode:
4137 if (cum->float_in_sse < 1)
4138 break;
4139 /* FALLTHRU */
4140 case TImode:
4141 case V16QImode:
4142 case V8HImode:
4143 case V4SImode:
4144 case V2DImode:
4145 case V4SFmode:
4146 case V2DFmode:
4147 if (!type || !AGGREGATE_TYPE_P (type))
4148 {
4149 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4150 {
4151 warnedsse = true;
4152 warning (0, "SSE vector argument without SSE enabled "
4153 "changes the ABI");
4154 }
4155 if (cum->sse_nregs)
4156 ret = gen_reg_or_parallel (mode, orig_mode,
4157 cum->sse_regno + FIRST_SSE_REG);
4158 }
4159 break;
4160 case V8QImode:
4161 case V4HImode:
4162 case V2SImode:
4163 case V2SFmode:
4164 if (!type || !AGGREGATE_TYPE_P (type))
4165 {
4166 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4167 {
4168 warnedmmx = true;
4169 warning (0, "MMX vector argument without MMX enabled "
4170 "changes the ABI");
4171 }
4172 if (cum->mmx_nregs)
4173 ret = gen_reg_or_parallel (mode, orig_mode,
4174 cum->mmx_regno + FIRST_MMX_REG);
4175 }
4176 break;
4177 }
4178
4179 if (TARGET_DEBUG_ARG)
4180 {
4181 fprintf (stderr,
4182 "function_arg (size=%d, wds=%2d, nregs=%d, mode=%4s, named=%d, ",
4183 words, cum->words, cum->nregs, GET_MODE_NAME (mode), named);
4184
4185 if (ret)
4186 print_simple_rtl (stderr, ret);
4187 else
4188 fprintf (stderr, ", stack");
4189
4190 fprintf (stderr, " )\n");
4191 }
4192
4193 return ret;
4194 }
4195
4196 /* A C expression that indicates when an argument must be passed by
4197 reference. If nonzero for an argument, a copy of that argument is
4198 made in memory and a pointer to the argument is passed instead of
4199 the argument itself. The pointer is passed in whatever way is
4200 appropriate for passing a pointer to that type. */
4201
4202 static bool
4203 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4204 enum machine_mode mode ATTRIBUTE_UNUSED,
4205 tree type, bool named ATTRIBUTE_UNUSED)
4206 {
4207 if (!TARGET_64BIT)
4208 return 0;
4209
4210 if (type && int_size_in_bytes (type) == -1)
4211 {
4212 if (TARGET_DEBUG_ARG)
4213 fprintf (stderr, "function_arg_pass_by_reference\n");
4214 return 1;
4215 }
4216
4217 return 0;
4218 }
4219
4220 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4221 ABI. Only called if TARGET_SSE. */
4222 static bool
4223 contains_128bit_aligned_vector_p (tree type)
4224 {
4225 enum machine_mode mode = TYPE_MODE (type);
4226 if (SSE_REG_MODE_P (mode)
4227 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4228 return true;
4229 if (TYPE_ALIGN (type) < 128)
4230 return false;
4231
4232 if (AGGREGATE_TYPE_P (type))
4233 {
4234 /* Walk the aggregates recursively. */
4235 switch (TREE_CODE (type))
4236 {
4237 case RECORD_TYPE:
4238 case UNION_TYPE:
4239 case QUAL_UNION_TYPE:
4240 {
4241 tree field;
4242
4243 /* Walk all the structure fields. */
4244 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4245 {
4246 if (TREE_CODE (field) == FIELD_DECL
4247 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4248 return true;
4249 }
4250 break;
4251 }
4252
4253 case ARRAY_TYPE:
4254 /* Just for use if some languages passes arrays by value. */
4255 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4256 return true;
4257 break;
4258
4259 default:
4260 gcc_unreachable ();
4261 }
4262 }
4263 return false;
4264 }
4265
4266 /* Gives the alignment boundary, in bits, of an argument with the
4267 specified mode and type. */
4268
4269 int
4270 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4271 {
4272 int align;
4273 if (type)
4274 align = TYPE_ALIGN (type);
4275 else
4276 align = GET_MODE_ALIGNMENT (mode);
4277 if (align < PARM_BOUNDARY)
4278 align = PARM_BOUNDARY;
4279 if (!TARGET_64BIT)
4280 {
4281 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4282 make an exception for SSE modes since these require 128bit
4283 alignment.
4284
4285 The handling here differs from field_alignment. ICC aligns MMX
4286 arguments to 4 byte boundaries, while structure fields are aligned
4287 to 8 byte boundaries. */
4288 if (!TARGET_SSE)
4289 align = PARM_BOUNDARY;
4290 else if (!type)
4291 {
4292 if (!SSE_REG_MODE_P (mode))
4293 align = PARM_BOUNDARY;
4294 }
4295 else
4296 {
4297 if (!contains_128bit_aligned_vector_p (type))
4298 align = PARM_BOUNDARY;
4299 }
4300 }
4301 if (align > 128)
4302 align = 128;
4303 return align;
4304 }
4305
4306 /* Return true if N is a possible register number of function value. */
4307 bool
4308 ix86_function_value_regno_p (int regno)
4309 {
4310 if (TARGET_MACHO)
4311 {
4312 if (!TARGET_64BIT)
4313 {
4314 return ((regno) == 0
4315 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4316 || ((regno) == FIRST_SSE_REG && TARGET_SSE));
4317 }
4318 return ((regno) == 0 || (regno) == FIRST_FLOAT_REG
4319 || ((regno) == FIRST_SSE_REG && TARGET_SSE)
4320 || ((regno) == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387));
4321 }
4322 else
4323 {
4324 if (regno == 0
4325 || (regno == FIRST_FLOAT_REG && TARGET_FLOAT_RETURNS_IN_80387)
4326 || (regno == FIRST_SSE_REG && TARGET_SSE))
4327 return true;
4328
4329 if (!TARGET_64BIT
4330 && (regno == FIRST_MMX_REG && TARGET_MMX))
4331 return true;
4332
4333 return false;
4334 }
4335 }
4336
4337 /* Define how to find the value returned by a function.
4338 VALTYPE is the data type of the value (as a tree).
4339 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4340 otherwise, FUNC is 0. */
4341 rtx
4342 ix86_function_value (tree valtype, tree fntype_or_decl,
4343 bool outgoing ATTRIBUTE_UNUSED)
4344 {
4345 enum machine_mode natmode = type_natural_mode (valtype);
4346
4347 if (TARGET_64BIT)
4348 {
4349 rtx ret = construct_container (natmode, TYPE_MODE (valtype), valtype,
4350 1, REGPARM_MAX, SSE_REGPARM_MAX,
4351 x86_64_int_return_registers, 0);
4352 /* For zero sized structures, construct_container return NULL, but we
4353 need to keep rest of compiler happy by returning meaningful value. */
4354 if (!ret)
4355 ret = gen_rtx_REG (TYPE_MODE (valtype), 0);
4356 return ret;
4357 }
4358 else
4359 {
4360 tree fn = NULL_TREE, fntype;
4361 if (fntype_or_decl
4362 && DECL_P (fntype_or_decl))
4363 fn = fntype_or_decl;
4364 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4365 return gen_rtx_REG (TYPE_MODE (valtype),
4366 ix86_value_regno (natmode, fn, fntype));
4367 }
4368 }
4369
4370 /* Return true iff type is returned in memory. */
4371 int
4372 ix86_return_in_memory (tree type)
4373 {
4374 int needed_intregs, needed_sseregs, size;
4375 enum machine_mode mode = type_natural_mode (type);
4376
4377 if (TARGET_64BIT)
4378 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4379
4380 if (mode == BLKmode)
4381 return 1;
4382
4383 size = int_size_in_bytes (type);
4384
4385 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4386 return 0;
4387
4388 if (VECTOR_MODE_P (mode) || mode == TImode)
4389 {
4390 /* User-created vectors small enough to fit in EAX. */
4391 if (size < 8)
4392 return 0;
4393
4394 /* MMX/3dNow values are returned in MM0,
4395 except when it doesn't exits. */
4396 if (size == 8)
4397 return (TARGET_MMX ? 0 : 1);
4398
4399 /* SSE values are returned in XMM0, except when it doesn't exist. */
4400 if (size == 16)
4401 return (TARGET_SSE ? 0 : 1);
4402 }
4403
4404 if (mode == XFmode)
4405 return 0;
4406
4407 if (mode == TDmode)
4408 return 1;
4409
4410 if (size > 12)
4411 return 1;
4412 return 0;
4413 }
4414
4415 /* When returning SSE vector types, we have a choice of either
4416 (1) being abi incompatible with a -march switch, or
4417 (2) generating an error.
4418 Given no good solution, I think the safest thing is one warning.
4419 The user won't be able to use -Werror, but....
4420
4421 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4422 called in response to actually generating a caller or callee that
4423 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4424 via aggregate_value_p for general type probing from tree-ssa. */
4425
4426 static rtx
4427 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4428 {
4429 static bool warnedsse, warnedmmx;
4430
4431 if (type)
4432 {
4433 /* Look at the return type of the function, not the function type. */
4434 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4435
4436 if (!TARGET_SSE && !warnedsse)
4437 {
4438 if (mode == TImode
4439 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4440 {
4441 warnedsse = true;
4442 warning (0, "SSE vector return without SSE enabled "
4443 "changes the ABI");
4444 }
4445 }
4446
4447 if (!TARGET_MMX && !warnedmmx)
4448 {
4449 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4450 {
4451 warnedmmx = true;
4452 warning (0, "MMX vector return without MMX enabled "
4453 "changes the ABI");
4454 }
4455 }
4456 }
4457
4458 return NULL;
4459 }
4460
4461 /* Define how to find the value returned by a library function
4462 assuming the value has mode MODE. */
4463 rtx
4464 ix86_libcall_value (enum machine_mode mode)
4465 {
4466 if (TARGET_64BIT)
4467 {
4468 switch (mode)
4469 {
4470 case SFmode:
4471 case SCmode:
4472 case DFmode:
4473 case DCmode:
4474 case TFmode:
4475 case SDmode:
4476 case DDmode:
4477 case TDmode:
4478 return gen_rtx_REG (mode, FIRST_SSE_REG);
4479 case XFmode:
4480 case XCmode:
4481 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4482 case TCmode:
4483 return NULL;
4484 default:
4485 return gen_rtx_REG (mode, 0);
4486 }
4487 }
4488 else
4489 return gen_rtx_REG (mode, ix86_value_regno (mode, NULL, NULL));
4490 }
4491
4492 /* Given a mode, return the register to use for a return value. */
4493
4494 static int
4495 ix86_value_regno (enum machine_mode mode, tree func, tree fntype)
4496 {
4497 gcc_assert (!TARGET_64BIT);
4498
4499 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4500 we normally prevent this case when mmx is not available. However
4501 some ABIs may require the result to be returned like DImode. */
4502 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4503 return TARGET_MMX ? FIRST_MMX_REG : 0;
4504
4505 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4506 we prevent this case when sse is not available. However some ABIs
4507 may require the result to be returned like integer TImode. */
4508 if (mode == TImode || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4509 return TARGET_SSE ? FIRST_SSE_REG : 0;
4510
4511 /* Decimal floating point values can go in %eax, unlike other float modes. */
4512 if (DECIMAL_FLOAT_MODE_P (mode))
4513 return 0;
4514
4515 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4516 if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4517 return 0;
4518
4519 /* Floating point return values in %st(0), except for local functions when
4520 SSE math is enabled or for functions with sseregparm attribute. */
4521 if ((func || fntype)
4522 && (mode == SFmode || mode == DFmode))
4523 {
4524 int sse_level = ix86_function_sseregparm (fntype, func);
4525 if ((sse_level >= 1 && mode == SFmode)
4526 || (sse_level == 2 && mode == DFmode))
4527 return FIRST_SSE_REG;
4528 }
4529
4530 return FIRST_FLOAT_REG;
4531 }
4532 \f
4533 /* Create the va_list data type. */
4534
4535 static tree
4536 ix86_build_builtin_va_list (void)
4537 {
4538 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4539
4540 /* For i386 we use plain pointer to argument area. */
4541 if (!TARGET_64BIT)
4542 return build_pointer_type (char_type_node);
4543
4544 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4545 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4546
4547 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4548 unsigned_type_node);
4549 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4550 unsigned_type_node);
4551 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4552 ptr_type_node);
4553 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4554 ptr_type_node);
4555
4556 va_list_gpr_counter_field = f_gpr;
4557 va_list_fpr_counter_field = f_fpr;
4558
4559 DECL_FIELD_CONTEXT (f_gpr) = record;
4560 DECL_FIELD_CONTEXT (f_fpr) = record;
4561 DECL_FIELD_CONTEXT (f_ovf) = record;
4562 DECL_FIELD_CONTEXT (f_sav) = record;
4563
4564 TREE_CHAIN (record) = type_decl;
4565 TYPE_NAME (record) = type_decl;
4566 TYPE_FIELDS (record) = f_gpr;
4567 TREE_CHAIN (f_gpr) = f_fpr;
4568 TREE_CHAIN (f_fpr) = f_ovf;
4569 TREE_CHAIN (f_ovf) = f_sav;
4570
4571 layout_type (record);
4572
4573 /* The correct type is an array type of one element. */
4574 return build_array_type (record, build_index_type (size_zero_node));
4575 }
4576
4577 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4578
4579 static void
4580 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4581 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4582 int no_rtl)
4583 {
4584 CUMULATIVE_ARGS next_cum;
4585 rtx save_area = NULL_RTX, mem;
4586 rtx label;
4587 rtx label_ref;
4588 rtx tmp_reg;
4589 rtx nsse_reg;
4590 int set;
4591 tree fntype;
4592 int stdarg_p;
4593 int i;
4594
4595 if (!TARGET_64BIT)
4596 return;
4597
4598 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4599 return;
4600
4601 /* Indicate to allocate space on the stack for varargs save area. */
4602 ix86_save_varrargs_registers = 1;
4603
4604 cfun->stack_alignment_needed = 128;
4605
4606 fntype = TREE_TYPE (current_function_decl);
4607 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4608 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4609 != void_type_node));
4610
4611 /* For varargs, we do not want to skip the dummy va_dcl argument.
4612 For stdargs, we do want to skip the last named argument. */
4613 next_cum = *cum;
4614 if (stdarg_p)
4615 function_arg_advance (&next_cum, mode, type, 1);
4616
4617 if (!no_rtl)
4618 save_area = frame_pointer_rtx;
4619
4620 set = get_varargs_alias_set ();
4621
4622 for (i = next_cum.regno;
4623 i < ix86_regparm
4624 && i < next_cum.regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4625 i++)
4626 {
4627 mem = gen_rtx_MEM (Pmode,
4628 plus_constant (save_area, i * UNITS_PER_WORD));
4629 MEM_NOTRAP_P (mem) = 1;
4630 set_mem_alias_set (mem, set);
4631 emit_move_insn (mem, gen_rtx_REG (Pmode,
4632 x86_64_int_parameter_registers[i]));
4633 }
4634
4635 if (next_cum.sse_nregs && cfun->va_list_fpr_size)
4636 {
4637 /* Now emit code to save SSE registers. The AX parameter contains number
4638 of SSE parameter registers used to call this function. We use
4639 sse_prologue_save insn template that produces computed jump across
4640 SSE saves. We need some preparation work to get this working. */
4641
4642 label = gen_label_rtx ();
4643 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4644
4645 /* Compute address to jump to :
4646 label - 5*eax + nnamed_sse_arguments*5 */
4647 tmp_reg = gen_reg_rtx (Pmode);
4648 nsse_reg = gen_reg_rtx (Pmode);
4649 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4650 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4651 gen_rtx_MULT (Pmode, nsse_reg,
4652 GEN_INT (4))));
4653 if (next_cum.sse_regno)
4654 emit_move_insn
4655 (nsse_reg,
4656 gen_rtx_CONST (DImode,
4657 gen_rtx_PLUS (DImode,
4658 label_ref,
4659 GEN_INT (next_cum.sse_regno * 4))));
4660 else
4661 emit_move_insn (nsse_reg, label_ref);
4662 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4663
4664 /* Compute address of memory block we save into. We always use pointer
4665 pointing 127 bytes after first byte to store - this is needed to keep
4666 instruction size limited by 4 bytes. */
4667 tmp_reg = gen_reg_rtx (Pmode);
4668 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4669 plus_constant (save_area,
4670 8 * REGPARM_MAX + 127)));
4671 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4672 MEM_NOTRAP_P (mem) = 1;
4673 set_mem_alias_set (mem, set);
4674 set_mem_align (mem, BITS_PER_WORD);
4675
4676 /* And finally do the dirty job! */
4677 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4678 GEN_INT (next_cum.sse_regno), label));
4679 }
4680
4681 }
4682
4683 /* Implement va_start. */
4684
4685 void
4686 ix86_va_start (tree valist, rtx nextarg)
4687 {
4688 HOST_WIDE_INT words, n_gpr, n_fpr;
4689 tree f_gpr, f_fpr, f_ovf, f_sav;
4690 tree gpr, fpr, ovf, sav, t;
4691 tree type;
4692
4693 /* Only 64bit target needs something special. */
4694 if (!TARGET_64BIT)
4695 {
4696 std_expand_builtin_va_start (valist, nextarg);
4697 return;
4698 }
4699
4700 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4701 f_fpr = TREE_CHAIN (f_gpr);
4702 f_ovf = TREE_CHAIN (f_fpr);
4703 f_sav = TREE_CHAIN (f_ovf);
4704
4705 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4706 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4707 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4708 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4709 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4710
4711 /* Count number of gp and fp argument registers used. */
4712 words = current_function_args_info.words;
4713 n_gpr = current_function_args_info.regno;
4714 n_fpr = current_function_args_info.sse_regno;
4715
4716 if (TARGET_DEBUG_ARG)
4717 fprintf (stderr, "va_start: words = %d, n_gpr = %d, n_fpr = %d\n",
4718 (int) words, (int) n_gpr, (int) n_fpr);
4719
4720 if (cfun->va_list_gpr_size)
4721 {
4722 type = TREE_TYPE (gpr);
4723 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4724 build_int_cst (type, n_gpr * 8));
4725 TREE_SIDE_EFFECTS (t) = 1;
4726 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4727 }
4728
4729 if (cfun->va_list_fpr_size)
4730 {
4731 type = TREE_TYPE (fpr);
4732 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4733 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4734 TREE_SIDE_EFFECTS (t) = 1;
4735 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4736 }
4737
4738 /* Find the overflow area. */
4739 type = TREE_TYPE (ovf);
4740 t = make_tree (type, virtual_incoming_args_rtx);
4741 if (words != 0)
4742 t = build2 (PLUS_EXPR, type, t,
4743 build_int_cst (type, words * UNITS_PER_WORD));
4744 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4745 TREE_SIDE_EFFECTS (t) = 1;
4746 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4747
4748 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4749 {
4750 /* Find the register save area.
4751 Prologue of the function save it right above stack frame. */
4752 type = TREE_TYPE (sav);
4753 t = make_tree (type, frame_pointer_rtx);
4754 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4755 TREE_SIDE_EFFECTS (t) = 1;
4756 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4757 }
4758 }
4759
4760 /* Implement va_arg. */
4761
4762 tree
4763 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4764 {
4765 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4766 tree f_gpr, f_fpr, f_ovf, f_sav;
4767 tree gpr, fpr, ovf, sav, t;
4768 int size, rsize;
4769 tree lab_false, lab_over = NULL_TREE;
4770 tree addr, t2;
4771 rtx container;
4772 int indirect_p = 0;
4773 tree ptrtype;
4774 enum machine_mode nat_mode;
4775
4776 /* Only 64bit target needs something special. */
4777 if (!TARGET_64BIT)
4778 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4779
4780 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4781 f_fpr = TREE_CHAIN (f_gpr);
4782 f_ovf = TREE_CHAIN (f_fpr);
4783 f_sav = TREE_CHAIN (f_ovf);
4784
4785 valist = build_va_arg_indirect_ref (valist);
4786 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4787 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4788 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4789 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4790
4791 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4792 if (indirect_p)
4793 type = build_pointer_type (type);
4794 size = int_size_in_bytes (type);
4795 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4796
4797 nat_mode = type_natural_mode (type);
4798 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4799 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4800
4801 /* Pull the value out of the saved registers. */
4802
4803 addr = create_tmp_var (ptr_type_node, "addr");
4804 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4805
4806 if (container)
4807 {
4808 int needed_intregs, needed_sseregs;
4809 bool need_temp;
4810 tree int_addr, sse_addr;
4811
4812 lab_false = create_artificial_label ();
4813 lab_over = create_artificial_label ();
4814
4815 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4816
4817 need_temp = (!REG_P (container)
4818 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4819 || TYPE_ALIGN (type) > 128));
4820
4821 /* In case we are passing structure, verify that it is consecutive block
4822 on the register save area. If not we need to do moves. */
4823 if (!need_temp && !REG_P (container))
4824 {
4825 /* Verify that all registers are strictly consecutive */
4826 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4827 {
4828 int i;
4829
4830 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4831 {
4832 rtx slot = XVECEXP (container, 0, i);
4833 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4834 || INTVAL (XEXP (slot, 1)) != i * 16)
4835 need_temp = 1;
4836 }
4837 }
4838 else
4839 {
4840 int i;
4841
4842 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4843 {
4844 rtx slot = XVECEXP (container, 0, i);
4845 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4846 || INTVAL (XEXP (slot, 1)) != i * 8)
4847 need_temp = 1;
4848 }
4849 }
4850 }
4851 if (!need_temp)
4852 {
4853 int_addr = addr;
4854 sse_addr = addr;
4855 }
4856 else
4857 {
4858 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4859 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4860 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4861 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4862 }
4863
4864 /* First ensure that we fit completely in registers. */
4865 if (needed_intregs)
4866 {
4867 t = build_int_cst (TREE_TYPE (gpr),
4868 (REGPARM_MAX - needed_intregs + 1) * 8);
4869 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4870 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4871 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4872 gimplify_and_add (t, pre_p);
4873 }
4874 if (needed_sseregs)
4875 {
4876 t = build_int_cst (TREE_TYPE (fpr),
4877 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4878 + REGPARM_MAX * 8);
4879 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4880 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4881 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4882 gimplify_and_add (t, pre_p);
4883 }
4884
4885 /* Compute index to start of area used for integer regs. */
4886 if (needed_intregs)
4887 {
4888 /* int_addr = gpr + sav; */
4889 t = fold_convert (ptr_type_node, gpr);
4890 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4891 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4892 gimplify_and_add (t, pre_p);
4893 }
4894 if (needed_sseregs)
4895 {
4896 /* sse_addr = fpr + sav; */
4897 t = fold_convert (ptr_type_node, fpr);
4898 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4899 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4900 gimplify_and_add (t, pre_p);
4901 }
4902 if (need_temp)
4903 {
4904 int i;
4905 tree temp = create_tmp_var (type, "va_arg_tmp");
4906
4907 /* addr = &temp; */
4908 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4909 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4910 gimplify_and_add (t, pre_p);
4911
4912 for (i = 0; i < XVECLEN (container, 0); i++)
4913 {
4914 rtx slot = XVECEXP (container, 0, i);
4915 rtx reg = XEXP (slot, 0);
4916 enum machine_mode mode = GET_MODE (reg);
4917 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4918 tree addr_type = build_pointer_type (piece_type);
4919 tree src_addr, src;
4920 int src_offset;
4921 tree dest_addr, dest;
4922
4923 if (SSE_REGNO_P (REGNO (reg)))
4924 {
4925 src_addr = sse_addr;
4926 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4927 }
4928 else
4929 {
4930 src_addr = int_addr;
4931 src_offset = REGNO (reg) * 8;
4932 }
4933 src_addr = fold_convert (addr_type, src_addr);
4934 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4935 size_int (src_offset));
4936 src = build_va_arg_indirect_ref (src_addr);
4937
4938 dest_addr = fold_convert (addr_type, addr);
4939 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4940 size_int (INTVAL (XEXP (slot, 1))));
4941 dest = build_va_arg_indirect_ref (dest_addr);
4942
4943 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4944 gimplify_and_add (t, pre_p);
4945 }
4946 }
4947
4948 if (needed_intregs)
4949 {
4950 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4951 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4952 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4953 gimplify_and_add (t, pre_p);
4954 }
4955 if (needed_sseregs)
4956 {
4957 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4958 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4959 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4960 gimplify_and_add (t, pre_p);
4961 }
4962
4963 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4964 gimplify_and_add (t, pre_p);
4965
4966 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4967 append_to_statement_list (t, pre_p);
4968 }
4969
4970 /* ... otherwise out of the overflow area. */
4971
4972 /* Care for on-stack alignment if needed. */
4973 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4974 || integer_zerop (TYPE_SIZE (type)))
4975 t = ovf;
4976 else
4977 {
4978 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4979 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4980 build_int_cst (TREE_TYPE (ovf), align - 1));
4981 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4982 build_int_cst (TREE_TYPE (t), -align));
4983 }
4984 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4985
4986 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4987 gimplify_and_add (t2, pre_p);
4988
4989 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4990 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4991 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4992 gimplify_and_add (t, pre_p);
4993
4994 if (container)
4995 {
4996 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4997 append_to_statement_list (t, pre_p);
4998 }
4999
5000 ptrtype = build_pointer_type (type);
5001 addr = fold_convert (ptrtype, addr);
5002
5003 if (indirect_p)
5004 addr = build_va_arg_indirect_ref (addr);
5005 return build_va_arg_indirect_ref (addr);
5006 }
5007 \f
5008 /* Return nonzero if OPNUM's MEM should be matched
5009 in movabs* patterns. */
5010
5011 int
5012 ix86_check_movabs (rtx insn, int opnum)
5013 {
5014 rtx set, mem;
5015
5016 set = PATTERN (insn);
5017 if (GET_CODE (set) == PARALLEL)
5018 set = XVECEXP (set, 0, 0);
5019 gcc_assert (GET_CODE (set) == SET);
5020 mem = XEXP (set, opnum);
5021 while (GET_CODE (mem) == SUBREG)
5022 mem = SUBREG_REG (mem);
5023 gcc_assert (MEM_P (mem));
5024 return (volatile_ok || !MEM_VOLATILE_P (mem));
5025 }
5026 \f
5027 /* Initialize the table of extra 80387 mathematical constants. */
5028
5029 static void
5030 init_ext_80387_constants (void)
5031 {
5032 static const char * cst[5] =
5033 {
5034 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5035 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5036 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5037 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5038 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5039 };
5040 int i;
5041
5042 for (i = 0; i < 5; i++)
5043 {
5044 real_from_string (&ext_80387_constants_table[i], cst[i]);
5045 /* Ensure each constant is rounded to XFmode precision. */
5046 real_convert (&ext_80387_constants_table[i],
5047 XFmode, &ext_80387_constants_table[i]);
5048 }
5049
5050 ext_80387_constants_init = 1;
5051 }
5052
5053 /* Return true if the constant is something that can be loaded with
5054 a special instruction. */
5055
5056 int
5057 standard_80387_constant_p (rtx x)
5058 {
5059 REAL_VALUE_TYPE r;
5060
5061 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
5062 return -1;
5063
5064 if (x == CONST0_RTX (GET_MODE (x)))
5065 return 1;
5066 if (x == CONST1_RTX (GET_MODE (x)))
5067 return 2;
5068
5069 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5070
5071 /* For XFmode constants, try to find a special 80387 instruction when
5072 optimizing for size or on those CPUs that benefit from them. */
5073 if (GET_MODE (x) == XFmode
5074 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5075 {
5076 int i;
5077
5078 if (! ext_80387_constants_init)
5079 init_ext_80387_constants ();
5080
5081 for (i = 0; i < 5; i++)
5082 if (real_identical (&r, &ext_80387_constants_table[i]))
5083 return i + 3;
5084 }
5085
5086 /* Load of the constant -0.0 or -1.0 will be split as
5087 fldz;fchs or fld1;fchs sequence. */
5088 if (real_isnegzero (&r))
5089 return 8;
5090 if (real_identical (&r, &dconstm1))
5091 return 9;
5092
5093 return 0;
5094 }
5095
5096 /* Return the opcode of the special instruction to be used to load
5097 the constant X. */
5098
5099 const char *
5100 standard_80387_constant_opcode (rtx x)
5101 {
5102 switch (standard_80387_constant_p (x))
5103 {
5104 case 1:
5105 return "fldz";
5106 case 2:
5107 return "fld1";
5108 case 3:
5109 return "fldlg2";
5110 case 4:
5111 return "fldln2";
5112 case 5:
5113 return "fldl2e";
5114 case 6:
5115 return "fldl2t";
5116 case 7:
5117 return "fldpi";
5118 case 8:
5119 case 9:
5120 return "#";
5121 default:
5122 gcc_unreachable ();
5123 }
5124 }
5125
5126 /* Return the CONST_DOUBLE representing the 80387 constant that is
5127 loaded by the specified special instruction. The argument IDX
5128 matches the return value from standard_80387_constant_p. */
5129
5130 rtx
5131 standard_80387_constant_rtx (int idx)
5132 {
5133 int i;
5134
5135 if (! ext_80387_constants_init)
5136 init_ext_80387_constants ();
5137
5138 switch (idx)
5139 {
5140 case 3:
5141 case 4:
5142 case 5:
5143 case 6:
5144 case 7:
5145 i = idx - 3;
5146 break;
5147
5148 default:
5149 gcc_unreachable ();
5150 }
5151
5152 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5153 XFmode);
5154 }
5155
5156 /* Return 1 if mode is a valid mode for sse. */
5157 static int
5158 standard_sse_mode_p (enum machine_mode mode)
5159 {
5160 switch (mode)
5161 {
5162 case V16QImode:
5163 case V8HImode:
5164 case V4SImode:
5165 case V2DImode:
5166 case V4SFmode:
5167 case V2DFmode:
5168 return 1;
5169
5170 default:
5171 return 0;
5172 }
5173 }
5174
5175 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5176 */
5177 int
5178 standard_sse_constant_p (rtx x)
5179 {
5180 enum machine_mode mode = GET_MODE (x);
5181
5182 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5183 return 1;
5184 if (vector_all_ones_operand (x, mode)
5185 && standard_sse_mode_p (mode))
5186 return TARGET_SSE2 ? 2 : -1;
5187
5188 return 0;
5189 }
5190
5191 /* Return the opcode of the special instruction to be used to load
5192 the constant X. */
5193
5194 const char *
5195 standard_sse_constant_opcode (rtx insn, rtx x)
5196 {
5197 switch (standard_sse_constant_p (x))
5198 {
5199 case 1:
5200 if (get_attr_mode (insn) == MODE_V4SF)
5201 return "xorps\t%0, %0";
5202 else if (get_attr_mode (insn) == MODE_V2DF)
5203 return "xorpd\t%0, %0";
5204 else
5205 return "pxor\t%0, %0";
5206 case 2:
5207 return "pcmpeqd\t%0, %0";
5208 }
5209 gcc_unreachable ();
5210 }
5211
5212 /* Returns 1 if OP contains a symbol reference */
5213
5214 int
5215 symbolic_reference_mentioned_p (rtx op)
5216 {
5217 const char *fmt;
5218 int i;
5219
5220 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5221 return 1;
5222
5223 fmt = GET_RTX_FORMAT (GET_CODE (op));
5224 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5225 {
5226 if (fmt[i] == 'E')
5227 {
5228 int j;
5229
5230 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5231 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5232 return 1;
5233 }
5234
5235 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5236 return 1;
5237 }
5238
5239 return 0;
5240 }
5241
5242 /* Return 1 if it is appropriate to emit `ret' instructions in the
5243 body of a function. Do this only if the epilogue is simple, needing a
5244 couple of insns. Prior to reloading, we can't tell how many registers
5245 must be saved, so return 0 then. Return 0 if there is no frame
5246 marker to de-allocate. */
5247
5248 int
5249 ix86_can_use_return_insn_p (void)
5250 {
5251 struct ix86_frame frame;
5252
5253 if (! reload_completed || frame_pointer_needed)
5254 return 0;
5255
5256 /* Don't allow more than 32 pop, since that's all we can do
5257 with one instruction. */
5258 if (current_function_pops_args
5259 && current_function_args_size >= 32768)
5260 return 0;
5261
5262 ix86_compute_frame_layout (&frame);
5263 return frame.to_allocate == 0 && frame.nregs == 0;
5264 }
5265 \f
5266 /* Value should be nonzero if functions must have frame pointers.
5267 Zero means the frame pointer need not be set up (and parms may
5268 be accessed via the stack pointer) in functions that seem suitable. */
5269
5270 int
5271 ix86_frame_pointer_required (void)
5272 {
5273 /* If we accessed previous frames, then the generated code expects
5274 to be able to access the saved ebp value in our frame. */
5275 if (cfun->machine->accesses_prev_frame)
5276 return 1;
5277
5278 /* Several x86 os'es need a frame pointer for other reasons,
5279 usually pertaining to setjmp. */
5280 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5281 return 1;
5282
5283 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5284 the frame pointer by default. Turn it back on now if we've not
5285 got a leaf function. */
5286 if (TARGET_OMIT_LEAF_FRAME_POINTER
5287 && (!current_function_is_leaf
5288 || ix86_current_function_calls_tls_descriptor))
5289 return 1;
5290
5291 if (current_function_profile)
5292 return 1;
5293
5294 return 0;
5295 }
5296
5297 /* Record that the current function accesses previous call frames. */
5298
5299 void
5300 ix86_setup_frame_addresses (void)
5301 {
5302 cfun->machine->accesses_prev_frame = 1;
5303 }
5304 \f
5305 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5306 # define USE_HIDDEN_LINKONCE 1
5307 #else
5308 # define USE_HIDDEN_LINKONCE 0
5309 #endif
5310
5311 static int pic_labels_used;
5312
5313 /* Fills in the label name that should be used for a pc thunk for
5314 the given register. */
5315
5316 static void
5317 get_pc_thunk_name (char name[32], unsigned int regno)
5318 {
5319 gcc_assert (!TARGET_64BIT);
5320
5321 if (USE_HIDDEN_LINKONCE)
5322 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5323 else
5324 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5325 }
5326
5327
5328 /* This function generates code for -fpic that loads %ebx with
5329 the return address of the caller and then returns. */
5330
5331 void
5332 ix86_file_end (void)
5333 {
5334 rtx xops[2];
5335 int regno;
5336
5337 for (regno = 0; regno < 8; ++regno)
5338 {
5339 char name[32];
5340
5341 if (! ((pic_labels_used >> regno) & 1))
5342 continue;
5343
5344 get_pc_thunk_name (name, regno);
5345
5346 #if TARGET_MACHO
5347 if (TARGET_MACHO)
5348 {
5349 switch_to_section (darwin_sections[text_coal_section]);
5350 fputs ("\t.weak_definition\t", asm_out_file);
5351 assemble_name (asm_out_file, name);
5352 fputs ("\n\t.private_extern\t", asm_out_file);
5353 assemble_name (asm_out_file, name);
5354 fputs ("\n", asm_out_file);
5355 ASM_OUTPUT_LABEL (asm_out_file, name);
5356 }
5357 else
5358 #endif
5359 if (USE_HIDDEN_LINKONCE)
5360 {
5361 tree decl;
5362
5363 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5364 error_mark_node);
5365 TREE_PUBLIC (decl) = 1;
5366 TREE_STATIC (decl) = 1;
5367 DECL_ONE_ONLY (decl) = 1;
5368
5369 (*targetm.asm_out.unique_section) (decl, 0);
5370 switch_to_section (get_named_section (decl, NULL, 0));
5371
5372 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5373 fputs ("\t.hidden\t", asm_out_file);
5374 assemble_name (asm_out_file, name);
5375 fputc ('\n', asm_out_file);
5376 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5377 }
5378 else
5379 {
5380 switch_to_section (text_section);
5381 ASM_OUTPUT_LABEL (asm_out_file, name);
5382 }
5383
5384 xops[0] = gen_rtx_REG (SImode, regno);
5385 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5386 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5387 output_asm_insn ("ret", xops);
5388 }
5389
5390 if (NEED_INDICATE_EXEC_STACK)
5391 file_end_indicate_exec_stack ();
5392 }
5393
5394 /* Emit code for the SET_GOT patterns. */
5395
5396 const char *
5397 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5398 {
5399 rtx xops[3];
5400
5401 xops[0] = dest;
5402
5403 if (TARGET_VXWORKS_RTP && flag_pic)
5404 {
5405 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5406 xops[2] = gen_rtx_MEM (Pmode,
5407 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5408 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5409
5410 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5411 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5412 an unadorned address. */
5413 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5414 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5415 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5416 return "";
5417 }
5418
5419 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5420
5421 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5422 {
5423 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5424
5425 if (!flag_pic)
5426 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5427 else
5428 output_asm_insn ("call\t%a2", xops);
5429
5430 #if TARGET_MACHO
5431 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5432 is what will be referenced by the Mach-O PIC subsystem. */
5433 if (!label)
5434 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5435 #endif
5436
5437 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5438 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5439
5440 if (flag_pic)
5441 output_asm_insn ("pop{l}\t%0", xops);
5442 }
5443 else
5444 {
5445 char name[32];
5446 get_pc_thunk_name (name, REGNO (dest));
5447 pic_labels_used |= 1 << REGNO (dest);
5448
5449 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5450 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5451 output_asm_insn ("call\t%X2", xops);
5452 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5453 is what will be referenced by the Mach-O PIC subsystem. */
5454 #if TARGET_MACHO
5455 if (!label)
5456 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5457 else
5458 targetm.asm_out.internal_label (asm_out_file, "L",
5459 CODE_LABEL_NUMBER (label));
5460 #endif
5461 }
5462
5463 if (TARGET_MACHO)
5464 return "";
5465
5466 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5467 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5468 else
5469 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5470
5471 return "";
5472 }
5473
5474 /* Generate an "push" pattern for input ARG. */
5475
5476 static rtx
5477 gen_push (rtx arg)
5478 {
5479 return gen_rtx_SET (VOIDmode,
5480 gen_rtx_MEM (Pmode,
5481 gen_rtx_PRE_DEC (Pmode,
5482 stack_pointer_rtx)),
5483 arg);
5484 }
5485
5486 /* Return >= 0 if there is an unused call-clobbered register available
5487 for the entire function. */
5488
5489 static unsigned int
5490 ix86_select_alt_pic_regnum (void)
5491 {
5492 if (current_function_is_leaf && !current_function_profile
5493 && !ix86_current_function_calls_tls_descriptor)
5494 {
5495 int i;
5496 for (i = 2; i >= 0; --i)
5497 if (!regs_ever_live[i])
5498 return i;
5499 }
5500
5501 return INVALID_REGNUM;
5502 }
5503
5504 /* Return 1 if we need to save REGNO. */
5505 static int
5506 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5507 {
5508 if (pic_offset_table_rtx
5509 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5510 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5511 || current_function_profile
5512 || current_function_calls_eh_return
5513 || current_function_uses_const_pool))
5514 {
5515 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5516 return 0;
5517 return 1;
5518 }
5519
5520 if (current_function_calls_eh_return && maybe_eh_return)
5521 {
5522 unsigned i;
5523 for (i = 0; ; i++)
5524 {
5525 unsigned test = EH_RETURN_DATA_REGNO (i);
5526 if (test == INVALID_REGNUM)
5527 break;
5528 if (test == regno)
5529 return 1;
5530 }
5531 }
5532
5533 if (cfun->machine->force_align_arg_pointer
5534 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5535 return 1;
5536
5537 return (regs_ever_live[regno]
5538 && !call_used_regs[regno]
5539 && !fixed_regs[regno]
5540 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5541 }
5542
5543 /* Return number of registers to be saved on the stack. */
5544
5545 static int
5546 ix86_nsaved_regs (void)
5547 {
5548 int nregs = 0;
5549 int regno;
5550
5551 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5552 if (ix86_save_reg (regno, true))
5553 nregs++;
5554 return nregs;
5555 }
5556
5557 /* Return the offset between two registers, one to be eliminated, and the other
5558 its replacement, at the start of a routine. */
5559
5560 HOST_WIDE_INT
5561 ix86_initial_elimination_offset (int from, int to)
5562 {
5563 struct ix86_frame frame;
5564 ix86_compute_frame_layout (&frame);
5565
5566 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5567 return frame.hard_frame_pointer_offset;
5568 else if (from == FRAME_POINTER_REGNUM
5569 && to == HARD_FRAME_POINTER_REGNUM)
5570 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5571 else
5572 {
5573 gcc_assert (to == STACK_POINTER_REGNUM);
5574
5575 if (from == ARG_POINTER_REGNUM)
5576 return frame.stack_pointer_offset;
5577
5578 gcc_assert (from == FRAME_POINTER_REGNUM);
5579 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5580 }
5581 }
5582
5583 /* Fill structure ix86_frame about frame of currently computed function. */
5584
5585 static void
5586 ix86_compute_frame_layout (struct ix86_frame *frame)
5587 {
5588 HOST_WIDE_INT total_size;
5589 unsigned int stack_alignment_needed;
5590 HOST_WIDE_INT offset;
5591 unsigned int preferred_alignment;
5592 HOST_WIDE_INT size = get_frame_size ();
5593
5594 frame->nregs = ix86_nsaved_regs ();
5595 total_size = size;
5596
5597 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5598 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5599
5600 /* During reload iteration the amount of registers saved can change.
5601 Recompute the value as needed. Do not recompute when amount of registers
5602 didn't change as reload does multiple calls to the function and does not
5603 expect the decision to change within single iteration. */
5604 if (!optimize_size
5605 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5606 {
5607 int count = frame->nregs;
5608
5609 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5610 /* The fast prologue uses move instead of push to save registers. This
5611 is significantly longer, but also executes faster as modern hardware
5612 can execute the moves in parallel, but can't do that for push/pop.
5613
5614 Be careful about choosing what prologue to emit: When function takes
5615 many instructions to execute we may use slow version as well as in
5616 case function is known to be outside hot spot (this is known with
5617 feedback only). Weight the size of function by number of registers
5618 to save as it is cheap to use one or two push instructions but very
5619 slow to use many of them. */
5620 if (count)
5621 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5622 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5623 || (flag_branch_probabilities
5624 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5625 cfun->machine->use_fast_prologue_epilogue = false;
5626 else
5627 cfun->machine->use_fast_prologue_epilogue
5628 = !expensive_function_p (count);
5629 }
5630 if (TARGET_PROLOGUE_USING_MOVE
5631 && cfun->machine->use_fast_prologue_epilogue)
5632 frame->save_regs_using_mov = true;
5633 else
5634 frame->save_regs_using_mov = false;
5635
5636
5637 /* Skip return address and saved base pointer. */
5638 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5639
5640 frame->hard_frame_pointer_offset = offset;
5641
5642 /* Do some sanity checking of stack_alignment_needed and
5643 preferred_alignment, since i386 port is the only using those features
5644 that may break easily. */
5645
5646 gcc_assert (!size || stack_alignment_needed);
5647 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5648 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5649 gcc_assert (stack_alignment_needed
5650 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5651
5652 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5653 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5654
5655 /* Register save area */
5656 offset += frame->nregs * UNITS_PER_WORD;
5657
5658 /* Va-arg area */
5659 if (ix86_save_varrargs_registers)
5660 {
5661 offset += X86_64_VARARGS_SIZE;
5662 frame->va_arg_size = X86_64_VARARGS_SIZE;
5663 }
5664 else
5665 frame->va_arg_size = 0;
5666
5667 /* Align start of frame for local function. */
5668 frame->padding1 = ((offset + stack_alignment_needed - 1)
5669 & -stack_alignment_needed) - offset;
5670
5671 offset += frame->padding1;
5672
5673 /* Frame pointer points here. */
5674 frame->frame_pointer_offset = offset;
5675
5676 offset += size;
5677
5678 /* Add outgoing arguments area. Can be skipped if we eliminated
5679 all the function calls as dead code.
5680 Skipping is however impossible when function calls alloca. Alloca
5681 expander assumes that last current_function_outgoing_args_size
5682 of stack frame are unused. */
5683 if (ACCUMULATE_OUTGOING_ARGS
5684 && (!current_function_is_leaf || current_function_calls_alloca
5685 || ix86_current_function_calls_tls_descriptor))
5686 {
5687 offset += current_function_outgoing_args_size;
5688 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5689 }
5690 else
5691 frame->outgoing_arguments_size = 0;
5692
5693 /* Align stack boundary. Only needed if we're calling another function
5694 or using alloca. */
5695 if (!current_function_is_leaf || current_function_calls_alloca
5696 || ix86_current_function_calls_tls_descriptor)
5697 frame->padding2 = ((offset + preferred_alignment - 1)
5698 & -preferred_alignment) - offset;
5699 else
5700 frame->padding2 = 0;
5701
5702 offset += frame->padding2;
5703
5704 /* We've reached end of stack frame. */
5705 frame->stack_pointer_offset = offset;
5706
5707 /* Size prologue needs to allocate. */
5708 frame->to_allocate =
5709 (size + frame->padding1 + frame->padding2
5710 + frame->outgoing_arguments_size + frame->va_arg_size);
5711
5712 if ((!frame->to_allocate && frame->nregs <= 1)
5713 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5714 frame->save_regs_using_mov = false;
5715
5716 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5717 && current_function_is_leaf
5718 && !ix86_current_function_calls_tls_descriptor)
5719 {
5720 frame->red_zone_size = frame->to_allocate;
5721 if (frame->save_regs_using_mov)
5722 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5723 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5724 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5725 }
5726 else
5727 frame->red_zone_size = 0;
5728 frame->to_allocate -= frame->red_zone_size;
5729 frame->stack_pointer_offset -= frame->red_zone_size;
5730 #if 0
5731 fprintf (stderr, "\n");
5732 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5733 fprintf (stderr, "size: %ld\n", (long)size);
5734 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5735 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5736 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5737 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5738 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5739 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5740 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5741 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5742 (long)frame->hard_frame_pointer_offset);
5743 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5744 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5745 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5746 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5747 #endif
5748 }
5749
5750 /* Emit code to save registers in the prologue. */
5751
5752 static void
5753 ix86_emit_save_regs (void)
5754 {
5755 unsigned int regno;
5756 rtx insn;
5757
5758 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5759 if (ix86_save_reg (regno, true))
5760 {
5761 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5762 RTX_FRAME_RELATED_P (insn) = 1;
5763 }
5764 }
5765
5766 /* Emit code to save registers using MOV insns. First register
5767 is restored from POINTER + OFFSET. */
5768 static void
5769 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5770 {
5771 unsigned int regno;
5772 rtx insn;
5773
5774 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5775 if (ix86_save_reg (regno, true))
5776 {
5777 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5778 Pmode, offset),
5779 gen_rtx_REG (Pmode, regno));
5780 RTX_FRAME_RELATED_P (insn) = 1;
5781 offset += UNITS_PER_WORD;
5782 }
5783 }
5784
5785 /* Expand prologue or epilogue stack adjustment.
5786 The pattern exist to put a dependency on all ebp-based memory accesses.
5787 STYLE should be negative if instructions should be marked as frame related,
5788 zero if %r11 register is live and cannot be freely used and positive
5789 otherwise. */
5790
5791 static void
5792 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5793 {
5794 rtx insn;
5795
5796 if (! TARGET_64BIT)
5797 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5798 else if (x86_64_immediate_operand (offset, DImode))
5799 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5800 else
5801 {
5802 rtx r11;
5803 /* r11 is used by indirect sibcall return as well, set before the
5804 epilogue and used after the epilogue. ATM indirect sibcall
5805 shouldn't be used together with huge frame sizes in one
5806 function because of the frame_size check in sibcall.c. */
5807 gcc_assert (style);
5808 r11 = gen_rtx_REG (DImode, R11_REG);
5809 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5810 if (style < 0)
5811 RTX_FRAME_RELATED_P (insn) = 1;
5812 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5813 offset));
5814 }
5815 if (style < 0)
5816 RTX_FRAME_RELATED_P (insn) = 1;
5817 }
5818
5819 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5820
5821 static rtx
5822 ix86_internal_arg_pointer (void)
5823 {
5824 bool has_force_align_arg_pointer =
5825 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5826 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5827 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5828 && DECL_NAME (current_function_decl)
5829 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5830 && DECL_FILE_SCOPE_P (current_function_decl))
5831 || ix86_force_align_arg_pointer
5832 || has_force_align_arg_pointer)
5833 {
5834 /* Nested functions can't realign the stack due to a register
5835 conflict. */
5836 if (DECL_CONTEXT (current_function_decl)
5837 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5838 {
5839 if (ix86_force_align_arg_pointer)
5840 warning (0, "-mstackrealign ignored for nested functions");
5841 if (has_force_align_arg_pointer)
5842 error ("%s not supported for nested functions",
5843 ix86_force_align_arg_pointer_string);
5844 return virtual_incoming_args_rtx;
5845 }
5846 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5847 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5848 }
5849 else
5850 return virtual_incoming_args_rtx;
5851 }
5852
5853 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5854 This is called from dwarf2out.c to emit call frame instructions
5855 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5856 static void
5857 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5858 {
5859 rtx unspec = SET_SRC (pattern);
5860 gcc_assert (GET_CODE (unspec) == UNSPEC);
5861
5862 switch (index)
5863 {
5864 case UNSPEC_REG_SAVE:
5865 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5866 SET_DEST (pattern));
5867 break;
5868 case UNSPEC_DEF_CFA:
5869 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5870 INTVAL (XVECEXP (unspec, 0, 0)));
5871 break;
5872 default:
5873 gcc_unreachable ();
5874 }
5875 }
5876
5877 /* Expand the prologue into a bunch of separate insns. */
5878
5879 void
5880 ix86_expand_prologue (void)
5881 {
5882 rtx insn;
5883 bool pic_reg_used;
5884 struct ix86_frame frame;
5885 HOST_WIDE_INT allocate;
5886
5887 ix86_compute_frame_layout (&frame);
5888
5889 if (cfun->machine->force_align_arg_pointer)
5890 {
5891 rtx x, y;
5892
5893 /* Grab the argument pointer. */
5894 x = plus_constant (stack_pointer_rtx, 4);
5895 y = cfun->machine->force_align_arg_pointer;
5896 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5897 RTX_FRAME_RELATED_P (insn) = 1;
5898
5899 /* The unwind info consists of two parts: install the fafp as the cfa,
5900 and record the fafp as the "save register" of the stack pointer.
5901 The later is there in order that the unwinder can see where it
5902 should restore the stack pointer across the and insn. */
5903 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5904 x = gen_rtx_SET (VOIDmode, y, x);
5905 RTX_FRAME_RELATED_P (x) = 1;
5906 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5907 UNSPEC_REG_SAVE);
5908 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5909 RTX_FRAME_RELATED_P (y) = 1;
5910 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5911 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5912 REG_NOTES (insn) = x;
5913
5914 /* Align the stack. */
5915 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5916 GEN_INT (-16)));
5917
5918 /* And here we cheat like madmen with the unwind info. We force the
5919 cfa register back to sp+4, which is exactly what it was at the
5920 start of the function. Re-pushing the return address results in
5921 the return at the same spot relative to the cfa, and thus is
5922 correct wrt the unwind info. */
5923 x = cfun->machine->force_align_arg_pointer;
5924 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5925 insn = emit_insn (gen_push (x));
5926 RTX_FRAME_RELATED_P (insn) = 1;
5927
5928 x = GEN_INT (4);
5929 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5930 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5931 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5932 REG_NOTES (insn) = x;
5933 }
5934
5935 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5936 slower on all targets. Also sdb doesn't like it. */
5937
5938 if (frame_pointer_needed)
5939 {
5940 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5941 RTX_FRAME_RELATED_P (insn) = 1;
5942
5943 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5944 RTX_FRAME_RELATED_P (insn) = 1;
5945 }
5946
5947 allocate = frame.to_allocate;
5948
5949 if (!frame.save_regs_using_mov)
5950 ix86_emit_save_regs ();
5951 else
5952 allocate += frame.nregs * UNITS_PER_WORD;
5953
5954 /* When using red zone we may start register saving before allocating
5955 the stack frame saving one cycle of the prologue. */
5956 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5957 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5958 : stack_pointer_rtx,
5959 -frame.nregs * UNITS_PER_WORD);
5960
5961 if (allocate == 0)
5962 ;
5963 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5964 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5965 GEN_INT (-allocate), -1);
5966 else
5967 {
5968 /* Only valid for Win32. */
5969 rtx eax = gen_rtx_REG (SImode, 0);
5970 bool eax_live = ix86_eax_live_at_start_p ();
5971 rtx t;
5972
5973 gcc_assert (!TARGET_64BIT);
5974
5975 if (eax_live)
5976 {
5977 emit_insn (gen_push (eax));
5978 allocate -= 4;
5979 }
5980
5981 emit_move_insn (eax, GEN_INT (allocate));
5982
5983 insn = emit_insn (gen_allocate_stack_worker (eax));
5984 RTX_FRAME_RELATED_P (insn) = 1;
5985 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5986 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5987 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5988 t, REG_NOTES (insn));
5989
5990 if (eax_live)
5991 {
5992 if (frame_pointer_needed)
5993 t = plus_constant (hard_frame_pointer_rtx,
5994 allocate
5995 - frame.to_allocate
5996 - frame.nregs * UNITS_PER_WORD);
5997 else
5998 t = plus_constant (stack_pointer_rtx, allocate);
5999 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
6000 }
6001 }
6002
6003 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6004 {
6005 if (!frame_pointer_needed || !frame.to_allocate)
6006 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6007 else
6008 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6009 -frame.nregs * UNITS_PER_WORD);
6010 }
6011
6012 pic_reg_used = false;
6013 if (pic_offset_table_rtx
6014 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
6015 || current_function_profile))
6016 {
6017 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6018
6019 if (alt_pic_reg_used != INVALID_REGNUM)
6020 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
6021
6022 pic_reg_used = true;
6023 }
6024
6025 if (pic_reg_used)
6026 {
6027 if (TARGET_64BIT)
6028 {
6029 if (ix86_cmodel == CM_LARGE_PIC)
6030 {
6031 rtx tmp_reg = gen_rtx_REG (DImode,
6032 FIRST_REX_INT_REG + 3 /* R11 */);
6033 rtx label = gen_label_rtx ();
6034 emit_label (label);
6035 LABEL_PRESERVE_P (label) = 1;
6036 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6037 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6038 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6039 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6040 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6041 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6042 pic_offset_table_rtx, tmp_reg));
6043 }
6044 else
6045 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6046 }
6047 else
6048 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6049
6050 /* Even with accurate pre-reload life analysis, we can wind up
6051 deleting all references to the pic register after reload.
6052 Consider if cross-jumping unifies two sides of a branch
6053 controlled by a comparison vs the only read from a global.
6054 In which case, allow the set_got to be deleted, though we're
6055 too late to do anything about the ebx save in the prologue. */
6056 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
6057 }
6058
6059 /* Prevent function calls from be scheduled before the call to mcount.
6060 In the pic_reg_used case, make sure that the got load isn't deleted. */
6061 if (current_function_profile)
6062 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
6063 }
6064
6065 /* Emit code to restore saved registers using MOV insns. First register
6066 is restored from POINTER + OFFSET. */
6067 static void
6068 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6069 int maybe_eh_return)
6070 {
6071 int regno;
6072 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6073
6074 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6075 if (ix86_save_reg (regno, maybe_eh_return))
6076 {
6077 /* Ensure that adjust_address won't be forced to produce pointer
6078 out of range allowed by x86-64 instruction set. */
6079 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6080 {
6081 rtx r11;
6082
6083 r11 = gen_rtx_REG (DImode, R11_REG);
6084 emit_move_insn (r11, GEN_INT (offset));
6085 emit_insn (gen_adddi3 (r11, r11, pointer));
6086 base_address = gen_rtx_MEM (Pmode, r11);
6087 offset = 0;
6088 }
6089 emit_move_insn (gen_rtx_REG (Pmode, regno),
6090 adjust_address (base_address, Pmode, offset));
6091 offset += UNITS_PER_WORD;
6092 }
6093 }
6094
6095 /* Restore function stack, frame, and registers. */
6096
6097 void
6098 ix86_expand_epilogue (int style)
6099 {
6100 int regno;
6101 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6102 struct ix86_frame frame;
6103 HOST_WIDE_INT offset;
6104
6105 ix86_compute_frame_layout (&frame);
6106
6107 /* Calculate start of saved registers relative to ebp. Special care
6108 must be taken for the normal return case of a function using
6109 eh_return: the eax and edx registers are marked as saved, but not
6110 restored along this path. */
6111 offset = frame.nregs;
6112 if (current_function_calls_eh_return && style != 2)
6113 offset -= 2;
6114 offset *= -UNITS_PER_WORD;
6115
6116 /* If we're only restoring one register and sp is not valid then
6117 using a move instruction to restore the register since it's
6118 less work than reloading sp and popping the register.
6119
6120 The default code result in stack adjustment using add/lea instruction,
6121 while this code results in LEAVE instruction (or discrete equivalent),
6122 so it is profitable in some other cases as well. Especially when there
6123 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6124 and there is exactly one register to pop. This heuristic may need some
6125 tuning in future. */
6126 if ((!sp_valid && frame.nregs <= 1)
6127 || (TARGET_EPILOGUE_USING_MOVE
6128 && cfun->machine->use_fast_prologue_epilogue
6129 && (frame.nregs > 1 || frame.to_allocate))
6130 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6131 || (frame_pointer_needed && TARGET_USE_LEAVE
6132 && cfun->machine->use_fast_prologue_epilogue
6133 && frame.nregs == 1)
6134 || current_function_calls_eh_return)
6135 {
6136 /* Restore registers. We can use ebp or esp to address the memory
6137 locations. If both are available, default to ebp, since offsets
6138 are known to be small. Only exception is esp pointing directly to the
6139 end of block of saved registers, where we may simplify addressing
6140 mode. */
6141
6142 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6143 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6144 frame.to_allocate, style == 2);
6145 else
6146 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6147 offset, style == 2);
6148
6149 /* eh_return epilogues need %ecx added to the stack pointer. */
6150 if (style == 2)
6151 {
6152 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6153
6154 if (frame_pointer_needed)
6155 {
6156 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6157 tmp = plus_constant (tmp, UNITS_PER_WORD);
6158 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6159
6160 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6161 emit_move_insn (hard_frame_pointer_rtx, tmp);
6162
6163 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6164 const0_rtx, style);
6165 }
6166 else
6167 {
6168 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6169 tmp = plus_constant (tmp, (frame.to_allocate
6170 + frame.nregs * UNITS_PER_WORD));
6171 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6172 }
6173 }
6174 else if (!frame_pointer_needed)
6175 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6176 GEN_INT (frame.to_allocate
6177 + frame.nregs * UNITS_PER_WORD),
6178 style);
6179 /* If not an i386, mov & pop is faster than "leave". */
6180 else if (TARGET_USE_LEAVE || optimize_size
6181 || !cfun->machine->use_fast_prologue_epilogue)
6182 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6183 else
6184 {
6185 pro_epilogue_adjust_stack (stack_pointer_rtx,
6186 hard_frame_pointer_rtx,
6187 const0_rtx, style);
6188 if (TARGET_64BIT)
6189 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6190 else
6191 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6192 }
6193 }
6194 else
6195 {
6196 /* First step is to deallocate the stack frame so that we can
6197 pop the registers. */
6198 if (!sp_valid)
6199 {
6200 gcc_assert (frame_pointer_needed);
6201 pro_epilogue_adjust_stack (stack_pointer_rtx,
6202 hard_frame_pointer_rtx,
6203 GEN_INT (offset), style);
6204 }
6205 else if (frame.to_allocate)
6206 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6207 GEN_INT (frame.to_allocate), style);
6208
6209 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6210 if (ix86_save_reg (regno, false))
6211 {
6212 if (TARGET_64BIT)
6213 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6214 else
6215 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6216 }
6217 if (frame_pointer_needed)
6218 {
6219 /* Leave results in shorter dependency chains on CPUs that are
6220 able to grok it fast. */
6221 if (TARGET_USE_LEAVE)
6222 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6223 else if (TARGET_64BIT)
6224 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6225 else
6226 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6227 }
6228 }
6229
6230 if (cfun->machine->force_align_arg_pointer)
6231 {
6232 emit_insn (gen_addsi3 (stack_pointer_rtx,
6233 cfun->machine->force_align_arg_pointer,
6234 GEN_INT (-4)));
6235 }
6236
6237 /* Sibcall epilogues don't want a return instruction. */
6238 if (style == 0)
6239 return;
6240
6241 if (current_function_pops_args && current_function_args_size)
6242 {
6243 rtx popc = GEN_INT (current_function_pops_args);
6244
6245 /* i386 can only pop 64K bytes. If asked to pop more, pop
6246 return address, do explicit add, and jump indirectly to the
6247 caller. */
6248
6249 if (current_function_pops_args >= 65536)
6250 {
6251 rtx ecx = gen_rtx_REG (SImode, 2);
6252
6253 /* There is no "pascal" calling convention in 64bit ABI. */
6254 gcc_assert (!TARGET_64BIT);
6255
6256 emit_insn (gen_popsi1 (ecx));
6257 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6258 emit_jump_insn (gen_return_indirect_internal (ecx));
6259 }
6260 else
6261 emit_jump_insn (gen_return_pop_internal (popc));
6262 }
6263 else
6264 emit_jump_insn (gen_return_internal ());
6265 }
6266
6267 /* Reset from the function's potential modifications. */
6268
6269 static void
6270 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6271 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6272 {
6273 if (pic_offset_table_rtx)
6274 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6275 #if TARGET_MACHO
6276 /* Mach-O doesn't support labels at the end of objects, so if
6277 it looks like we might want one, insert a NOP. */
6278 {
6279 rtx insn = get_last_insn ();
6280 while (insn
6281 && NOTE_P (insn)
6282 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6283 insn = PREV_INSN (insn);
6284 if (insn
6285 && (LABEL_P (insn)
6286 || (NOTE_P (insn)
6287 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6288 fputs ("\tnop\n", file);
6289 }
6290 #endif
6291
6292 }
6293 \f
6294 /* Extract the parts of an RTL expression that is a valid memory address
6295 for an instruction. Return 0 if the structure of the address is
6296 grossly off. Return -1 if the address contains ASHIFT, so it is not
6297 strictly valid, but still used for computing length of lea instruction. */
6298
6299 int
6300 ix86_decompose_address (rtx addr, struct ix86_address *out)
6301 {
6302 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6303 rtx base_reg, index_reg;
6304 HOST_WIDE_INT scale = 1;
6305 rtx scale_rtx = NULL_RTX;
6306 int retval = 1;
6307 enum ix86_address_seg seg = SEG_DEFAULT;
6308
6309 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6310 base = addr;
6311 else if (GET_CODE (addr) == PLUS)
6312 {
6313 rtx addends[4], op;
6314 int n = 0, i;
6315
6316 op = addr;
6317 do
6318 {
6319 if (n >= 4)
6320 return 0;
6321 addends[n++] = XEXP (op, 1);
6322 op = XEXP (op, 0);
6323 }
6324 while (GET_CODE (op) == PLUS);
6325 if (n >= 4)
6326 return 0;
6327 addends[n] = op;
6328
6329 for (i = n; i >= 0; --i)
6330 {
6331 op = addends[i];
6332 switch (GET_CODE (op))
6333 {
6334 case MULT:
6335 if (index)
6336 return 0;
6337 index = XEXP (op, 0);
6338 scale_rtx = XEXP (op, 1);
6339 break;
6340
6341 case UNSPEC:
6342 if (XINT (op, 1) == UNSPEC_TP
6343 && TARGET_TLS_DIRECT_SEG_REFS
6344 && seg == SEG_DEFAULT)
6345 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6346 else
6347 return 0;
6348 break;
6349
6350 case REG:
6351 case SUBREG:
6352 if (!base)
6353 base = op;
6354 else if (!index)
6355 index = op;
6356 else
6357 return 0;
6358 break;
6359
6360 case CONST:
6361 case CONST_INT:
6362 case SYMBOL_REF:
6363 case LABEL_REF:
6364 if (disp)
6365 return 0;
6366 disp = op;
6367 break;
6368
6369 default:
6370 return 0;
6371 }
6372 }
6373 }
6374 else if (GET_CODE (addr) == MULT)
6375 {
6376 index = XEXP (addr, 0); /* index*scale */
6377 scale_rtx = XEXP (addr, 1);
6378 }
6379 else if (GET_CODE (addr) == ASHIFT)
6380 {
6381 rtx tmp;
6382
6383 /* We're called for lea too, which implements ashift on occasion. */
6384 index = XEXP (addr, 0);
6385 tmp = XEXP (addr, 1);
6386 if (!CONST_INT_P (tmp))
6387 return 0;
6388 scale = INTVAL (tmp);
6389 if ((unsigned HOST_WIDE_INT) scale > 3)
6390 return 0;
6391 scale = 1 << scale;
6392 retval = -1;
6393 }
6394 else
6395 disp = addr; /* displacement */
6396
6397 /* Extract the integral value of scale. */
6398 if (scale_rtx)
6399 {
6400 if (!CONST_INT_P (scale_rtx))
6401 return 0;
6402 scale = INTVAL (scale_rtx);
6403 }
6404
6405 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6406 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6407
6408 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6409 if (base_reg && index_reg && scale == 1
6410 && (index_reg == arg_pointer_rtx
6411 || index_reg == frame_pointer_rtx
6412 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6413 {
6414 rtx tmp;
6415 tmp = base, base = index, index = tmp;
6416 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6417 }
6418
6419 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6420 if ((base_reg == hard_frame_pointer_rtx
6421 || base_reg == frame_pointer_rtx
6422 || base_reg == arg_pointer_rtx) && !disp)
6423 disp = const0_rtx;
6424
6425 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6426 Avoid this by transforming to [%esi+0]. */
6427 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6428 && base_reg && !index_reg && !disp
6429 && REG_P (base_reg)
6430 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6431 disp = const0_rtx;
6432
6433 /* Special case: encode reg+reg instead of reg*2. */
6434 if (!base && index && scale && scale == 2)
6435 base = index, base_reg = index_reg, scale = 1;
6436
6437 /* Special case: scaling cannot be encoded without base or displacement. */
6438 if (!base && !disp && index && scale != 1)
6439 disp = const0_rtx;
6440
6441 out->base = base;
6442 out->index = index;
6443 out->disp = disp;
6444 out->scale = scale;
6445 out->seg = seg;
6446
6447 return retval;
6448 }
6449 \f
6450 /* Return cost of the memory address x.
6451 For i386, it is better to use a complex address than let gcc copy
6452 the address into a reg and make a new pseudo. But not if the address
6453 requires to two regs - that would mean more pseudos with longer
6454 lifetimes. */
6455 static int
6456 ix86_address_cost (rtx x)
6457 {
6458 struct ix86_address parts;
6459 int cost = 1;
6460 int ok = ix86_decompose_address (x, &parts);
6461
6462 gcc_assert (ok);
6463
6464 if (parts.base && GET_CODE (parts.base) == SUBREG)
6465 parts.base = SUBREG_REG (parts.base);
6466 if (parts.index && GET_CODE (parts.index) == SUBREG)
6467 parts.index = SUBREG_REG (parts.index);
6468
6469 /* More complex memory references are better. */
6470 if (parts.disp && parts.disp != const0_rtx)
6471 cost--;
6472 if (parts.seg != SEG_DEFAULT)
6473 cost--;
6474
6475 /* Attempt to minimize number of registers in the address. */
6476 if ((parts.base
6477 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6478 || (parts.index
6479 && (!REG_P (parts.index)
6480 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6481 cost++;
6482
6483 if (parts.base
6484 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6485 && parts.index
6486 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6487 && parts.base != parts.index)
6488 cost++;
6489
6490 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6491 since it's predecode logic can't detect the length of instructions
6492 and it degenerates to vector decoded. Increase cost of such
6493 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6494 to split such addresses or even refuse such addresses at all.
6495
6496 Following addressing modes are affected:
6497 [base+scale*index]
6498 [scale*index+disp]
6499 [base+index]
6500
6501 The first and last case may be avoidable by explicitly coding the zero in
6502 memory address, but I don't have AMD-K6 machine handy to check this
6503 theory. */
6504
6505 if (TARGET_K6
6506 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6507 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6508 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6509 cost += 10;
6510
6511 return cost;
6512 }
6513 \f
6514 /* If X is a machine specific address (i.e. a symbol or label being
6515 referenced as a displacement from the GOT implemented using an
6516 UNSPEC), then return the base term. Otherwise return X. */
6517
6518 rtx
6519 ix86_find_base_term (rtx x)
6520 {
6521 rtx term;
6522
6523 if (TARGET_64BIT)
6524 {
6525 if (GET_CODE (x) != CONST)
6526 return x;
6527 term = XEXP (x, 0);
6528 if (GET_CODE (term) == PLUS
6529 && (CONST_INT_P (XEXP (term, 1))
6530 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
6531 term = XEXP (term, 0);
6532 if (GET_CODE (term) != UNSPEC
6533 || XINT (term, 1) != UNSPEC_GOTPCREL)
6534 return x;
6535
6536 term = XVECEXP (term, 0, 0);
6537
6538 if (GET_CODE (term) != SYMBOL_REF
6539 && GET_CODE (term) != LABEL_REF)
6540 return x;
6541
6542 return term;
6543 }
6544
6545 term = ix86_delegitimize_address (x);
6546
6547 if (GET_CODE (term) != SYMBOL_REF
6548 && GET_CODE (term) != LABEL_REF)
6549 return x;
6550
6551 return term;
6552 }
6553
6554 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6555 this is used for to form addresses to local data when -fPIC is in
6556 use. */
6557
6558 static bool
6559 darwin_local_data_pic (rtx disp)
6560 {
6561 if (GET_CODE (disp) == MINUS)
6562 {
6563 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6564 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6565 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6566 {
6567 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6568 if (! strcmp (sym_name, "<pic base>"))
6569 return true;
6570 }
6571 }
6572
6573 return false;
6574 }
6575 \f
6576 /* Determine if a given RTX is a valid constant. We already know this
6577 satisfies CONSTANT_P. */
6578
6579 bool
6580 legitimate_constant_p (rtx x)
6581 {
6582 switch (GET_CODE (x))
6583 {
6584 case CONST:
6585 x = XEXP (x, 0);
6586
6587 if (GET_CODE (x) == PLUS)
6588 {
6589 if (!CONST_INT_P (XEXP (x, 1)))
6590 return false;
6591 x = XEXP (x, 0);
6592 }
6593
6594 if (TARGET_MACHO && darwin_local_data_pic (x))
6595 return true;
6596
6597 /* Only some unspecs are valid as "constants". */
6598 if (GET_CODE (x) == UNSPEC)
6599 switch (XINT (x, 1))
6600 {
6601 case UNSPEC_GOT:
6602 case UNSPEC_GOTOFF:
6603 case UNSPEC_PLTOFF:
6604 return TARGET_64BIT;
6605 case UNSPEC_TPOFF:
6606 case UNSPEC_NTPOFF:
6607 x = XVECEXP (x, 0, 0);
6608 return (GET_CODE (x) == SYMBOL_REF
6609 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6610 case UNSPEC_DTPOFF:
6611 x = XVECEXP (x, 0, 0);
6612 return (GET_CODE (x) == SYMBOL_REF
6613 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6614 default:
6615 return false;
6616 }
6617
6618 /* We must have drilled down to a symbol. */
6619 if (GET_CODE (x) == LABEL_REF)
6620 return true;
6621 if (GET_CODE (x) != SYMBOL_REF)
6622 return false;
6623 /* FALLTHRU */
6624
6625 case SYMBOL_REF:
6626 /* TLS symbols are never valid. */
6627 if (SYMBOL_REF_TLS_MODEL (x))
6628 return false;
6629 break;
6630
6631 case CONST_DOUBLE:
6632 if (GET_MODE (x) == TImode
6633 && x != CONST0_RTX (TImode)
6634 && !TARGET_64BIT)
6635 return false;
6636 break;
6637
6638 case CONST_VECTOR:
6639 if (x == CONST0_RTX (GET_MODE (x)))
6640 return true;
6641 return false;
6642
6643 default:
6644 break;
6645 }
6646
6647 /* Otherwise we handle everything else in the move patterns. */
6648 return true;
6649 }
6650
6651 /* Determine if it's legal to put X into the constant pool. This
6652 is not possible for the address of thread-local symbols, which
6653 is checked above. */
6654
6655 static bool
6656 ix86_cannot_force_const_mem (rtx x)
6657 {
6658 /* We can always put integral constants and vectors in memory. */
6659 switch (GET_CODE (x))
6660 {
6661 case CONST_INT:
6662 case CONST_DOUBLE:
6663 case CONST_VECTOR:
6664 return false;
6665
6666 default:
6667 break;
6668 }
6669 return !legitimate_constant_p (x);
6670 }
6671
6672 /* Determine if a given RTX is a valid constant address. */
6673
6674 bool
6675 constant_address_p (rtx x)
6676 {
6677 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6678 }
6679
6680 /* Nonzero if the constant value X is a legitimate general operand
6681 when generating PIC code. It is given that flag_pic is on and
6682 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6683
6684 bool
6685 legitimate_pic_operand_p (rtx x)
6686 {
6687 rtx inner;
6688
6689 switch (GET_CODE (x))
6690 {
6691 case CONST:
6692 inner = XEXP (x, 0);
6693 if (GET_CODE (inner) == PLUS
6694 && CONST_INT_P (XEXP (inner, 1)))
6695 inner = XEXP (inner, 0);
6696
6697 /* Only some unspecs are valid as "constants". */
6698 if (GET_CODE (inner) == UNSPEC)
6699 switch (XINT (inner, 1))
6700 {
6701 case UNSPEC_GOT:
6702 case UNSPEC_GOTOFF:
6703 case UNSPEC_PLTOFF:
6704 return TARGET_64BIT;
6705 case UNSPEC_TPOFF:
6706 x = XVECEXP (inner, 0, 0);
6707 return (GET_CODE (x) == SYMBOL_REF
6708 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6709 default:
6710 return false;
6711 }
6712 /* FALLTHRU */
6713
6714 case SYMBOL_REF:
6715 case LABEL_REF:
6716 return legitimate_pic_address_disp_p (x);
6717
6718 default:
6719 return true;
6720 }
6721 }
6722
6723 /* Determine if a given CONST RTX is a valid memory displacement
6724 in PIC mode. */
6725
6726 int
6727 legitimate_pic_address_disp_p (rtx disp)
6728 {
6729 bool saw_plus;
6730
6731 /* In 64bit mode we can allow direct addresses of symbols and labels
6732 when they are not dynamic symbols. */
6733 if (TARGET_64BIT)
6734 {
6735 rtx op0 = disp, op1;
6736
6737 switch (GET_CODE (disp))
6738 {
6739 case LABEL_REF:
6740 return true;
6741
6742 case CONST:
6743 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6744 break;
6745 op0 = XEXP (XEXP (disp, 0), 0);
6746 op1 = XEXP (XEXP (disp, 0), 1);
6747 if (!CONST_INT_P (op1)
6748 || INTVAL (op1) >= 16*1024*1024
6749 || INTVAL (op1) < -16*1024*1024)
6750 break;
6751 if (GET_CODE (op0) == LABEL_REF)
6752 return true;
6753 if (GET_CODE (op0) != SYMBOL_REF)
6754 break;
6755 /* FALLTHRU */
6756
6757 case SYMBOL_REF:
6758 /* TLS references should always be enclosed in UNSPEC. */
6759 if (SYMBOL_REF_TLS_MODEL (op0))
6760 return false;
6761 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6762 && ix86_cmodel != CM_LARGE_PIC)
6763 return true;
6764 break;
6765
6766 default:
6767 break;
6768 }
6769 }
6770 if (GET_CODE (disp) != CONST)
6771 return 0;
6772 disp = XEXP (disp, 0);
6773
6774 if (TARGET_64BIT)
6775 {
6776 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6777 of GOT tables. We should not need these anyway. */
6778 if (GET_CODE (disp) != UNSPEC
6779 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6780 && XINT (disp, 1) != UNSPEC_GOTOFF
6781 && XINT (disp, 1) != UNSPEC_PLTOFF))
6782 return 0;
6783
6784 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6785 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6786 return 0;
6787 return 1;
6788 }
6789
6790 saw_plus = false;
6791 if (GET_CODE (disp) == PLUS)
6792 {
6793 if (!CONST_INT_P (XEXP (disp, 1)))
6794 return 0;
6795 disp = XEXP (disp, 0);
6796 saw_plus = true;
6797 }
6798
6799 if (TARGET_MACHO && darwin_local_data_pic (disp))
6800 return 1;
6801
6802 if (GET_CODE (disp) != UNSPEC)
6803 return 0;
6804
6805 switch (XINT (disp, 1))
6806 {
6807 case UNSPEC_GOT:
6808 if (saw_plus)
6809 return false;
6810 /* We need to check for both symbols and labels because VxWorks loads
6811 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6812 details. */
6813 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6814 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6815 case UNSPEC_GOTOFF:
6816 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6817 While ABI specify also 32bit relocation but we don't produce it in
6818 small PIC model at all. */
6819 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6820 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6821 && !TARGET_64BIT)
6822 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6823 return false;
6824 case UNSPEC_GOTTPOFF:
6825 case UNSPEC_GOTNTPOFF:
6826 case UNSPEC_INDNTPOFF:
6827 if (saw_plus)
6828 return false;
6829 disp = XVECEXP (disp, 0, 0);
6830 return (GET_CODE (disp) == SYMBOL_REF
6831 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6832 case UNSPEC_NTPOFF:
6833 disp = XVECEXP (disp, 0, 0);
6834 return (GET_CODE (disp) == SYMBOL_REF
6835 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6836 case UNSPEC_DTPOFF:
6837 disp = XVECEXP (disp, 0, 0);
6838 return (GET_CODE (disp) == SYMBOL_REF
6839 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6840 }
6841
6842 return 0;
6843 }
6844
6845 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6846 memory address for an instruction. The MODE argument is the machine mode
6847 for the MEM expression that wants to use this address.
6848
6849 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6850 convert common non-canonical forms to canonical form so that they will
6851 be recognized. */
6852
6853 int
6854 legitimate_address_p (enum machine_mode mode, rtx addr, int strict)
6855 {
6856 struct ix86_address parts;
6857 rtx base, index, disp;
6858 HOST_WIDE_INT scale;
6859 const char *reason = NULL;
6860 rtx reason_rtx = NULL_RTX;
6861
6862 if (TARGET_DEBUG_ADDR)
6863 {
6864 fprintf (stderr,
6865 "\n======\nGO_IF_LEGITIMATE_ADDRESS, mode = %s, strict = %d\n",
6866 GET_MODE_NAME (mode), strict);
6867 debug_rtx (addr);
6868 }
6869
6870 if (ix86_decompose_address (addr, &parts) <= 0)
6871 {
6872 reason = "decomposition failed";
6873 goto report_error;
6874 }
6875
6876 base = parts.base;
6877 index = parts.index;
6878 disp = parts.disp;
6879 scale = parts.scale;
6880
6881 /* Validate base register.
6882
6883 Don't allow SUBREG's that span more than a word here. It can lead to spill
6884 failures when the base is one word out of a two word structure, which is
6885 represented internally as a DImode int. */
6886
6887 if (base)
6888 {
6889 rtx reg;
6890 reason_rtx = base;
6891
6892 if (REG_P (base))
6893 reg = base;
6894 else if (GET_CODE (base) == SUBREG
6895 && REG_P (SUBREG_REG (base))
6896 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6897 <= UNITS_PER_WORD)
6898 reg = SUBREG_REG (base);
6899 else
6900 {
6901 reason = "base is not a register";
6902 goto report_error;
6903 }
6904
6905 if (GET_MODE (base) != Pmode)
6906 {
6907 reason = "base is not in Pmode";
6908 goto report_error;
6909 }
6910
6911 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6912 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6913 {
6914 reason = "base is not valid";
6915 goto report_error;
6916 }
6917 }
6918
6919 /* Validate index register.
6920
6921 Don't allow SUBREG's that span more than a word here -- same as above. */
6922
6923 if (index)
6924 {
6925 rtx reg;
6926 reason_rtx = index;
6927
6928 if (REG_P (index))
6929 reg = index;
6930 else if (GET_CODE (index) == SUBREG
6931 && REG_P (SUBREG_REG (index))
6932 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6933 <= UNITS_PER_WORD)
6934 reg = SUBREG_REG (index);
6935 else
6936 {
6937 reason = "index is not a register";
6938 goto report_error;
6939 }
6940
6941 if (GET_MODE (index) != Pmode)
6942 {
6943 reason = "index is not in Pmode";
6944 goto report_error;
6945 }
6946
6947 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6948 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6949 {
6950 reason = "index is not valid";
6951 goto report_error;
6952 }
6953 }
6954
6955 /* Validate scale factor. */
6956 if (scale != 1)
6957 {
6958 reason_rtx = GEN_INT (scale);
6959 if (!index)
6960 {
6961 reason = "scale without index";
6962 goto report_error;
6963 }
6964
6965 if (scale != 2 && scale != 4 && scale != 8)
6966 {
6967 reason = "scale is not a valid multiplier";
6968 goto report_error;
6969 }
6970 }
6971
6972 /* Validate displacement. */
6973 if (disp)
6974 {
6975 reason_rtx = disp;
6976
6977 if (GET_CODE (disp) == CONST
6978 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6979 switch (XINT (XEXP (disp, 0), 1))
6980 {
6981 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6982 used. While ABI specify also 32bit relocations, we don't produce
6983 them at all and use IP relative instead. */
6984 case UNSPEC_GOT:
6985 case UNSPEC_GOTOFF:
6986 gcc_assert (flag_pic);
6987 if (!TARGET_64BIT)
6988 goto is_legitimate_pic;
6989 reason = "64bit address unspec";
6990 goto report_error;
6991
6992 case UNSPEC_GOTPCREL:
6993 gcc_assert (flag_pic);
6994 goto is_legitimate_pic;
6995
6996 case UNSPEC_GOTTPOFF:
6997 case UNSPEC_GOTNTPOFF:
6998 case UNSPEC_INDNTPOFF:
6999 case UNSPEC_NTPOFF:
7000 case UNSPEC_DTPOFF:
7001 break;
7002
7003 default:
7004 reason = "invalid address unspec";
7005 goto report_error;
7006 }
7007
7008 else if (SYMBOLIC_CONST (disp)
7009 && (flag_pic
7010 || (TARGET_MACHO
7011 #if TARGET_MACHO
7012 && MACHOPIC_INDIRECT
7013 && !machopic_operand_p (disp)
7014 #endif
7015 )))
7016 {
7017
7018 is_legitimate_pic:
7019 if (TARGET_64BIT && (index || base))
7020 {
7021 /* foo@dtpoff(%rX) is ok. */
7022 if (GET_CODE (disp) != CONST
7023 || GET_CODE (XEXP (disp, 0)) != PLUS
7024 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7025 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7026 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7027 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7028 {
7029 reason = "non-constant pic memory reference";
7030 goto report_error;
7031 }
7032 }
7033 else if (! legitimate_pic_address_disp_p (disp))
7034 {
7035 reason = "displacement is an invalid pic construct";
7036 goto report_error;
7037 }
7038
7039 /* This code used to verify that a symbolic pic displacement
7040 includes the pic_offset_table_rtx register.
7041
7042 While this is good idea, unfortunately these constructs may
7043 be created by "adds using lea" optimization for incorrect
7044 code like:
7045
7046 int a;
7047 int foo(int i)
7048 {
7049 return *(&a+i);
7050 }
7051
7052 This code is nonsensical, but results in addressing
7053 GOT table with pic_offset_table_rtx base. We can't
7054 just refuse it easily, since it gets matched by
7055 "addsi3" pattern, that later gets split to lea in the
7056 case output register differs from input. While this
7057 can be handled by separate addsi pattern for this case
7058 that never results in lea, this seems to be easier and
7059 correct fix for crash to disable this test. */
7060 }
7061 else if (GET_CODE (disp) != LABEL_REF
7062 && !CONST_INT_P (disp)
7063 && (GET_CODE (disp) != CONST
7064 || !legitimate_constant_p (disp))
7065 && (GET_CODE (disp) != SYMBOL_REF
7066 || !legitimate_constant_p (disp)))
7067 {
7068 reason = "displacement is not constant";
7069 goto report_error;
7070 }
7071 else if (TARGET_64BIT
7072 && !x86_64_immediate_operand (disp, VOIDmode))
7073 {
7074 reason = "displacement is out of range";
7075 goto report_error;
7076 }
7077 }
7078
7079 /* Everything looks valid. */
7080 if (TARGET_DEBUG_ADDR)
7081 fprintf (stderr, "Success.\n");
7082 return TRUE;
7083
7084 report_error:
7085 if (TARGET_DEBUG_ADDR)
7086 {
7087 fprintf (stderr, "Error: %s\n", reason);
7088 debug_rtx (reason_rtx);
7089 }
7090 return FALSE;
7091 }
7092 \f
7093 /* Return a unique alias set for the GOT. */
7094
7095 static HOST_WIDE_INT
7096 ix86_GOT_alias_set (void)
7097 {
7098 static HOST_WIDE_INT set = -1;
7099 if (set == -1)
7100 set = new_alias_set ();
7101 return set;
7102 }
7103
7104 /* Return a legitimate reference for ORIG (an address) using the
7105 register REG. If REG is 0, a new pseudo is generated.
7106
7107 There are two types of references that must be handled:
7108
7109 1. Global data references must load the address from the GOT, via
7110 the PIC reg. An insn is emitted to do this load, and the reg is
7111 returned.
7112
7113 2. Static data references, constant pool addresses, and code labels
7114 compute the address as an offset from the GOT, whose base is in
7115 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7116 differentiate them from global data objects. The returned
7117 address is the PIC reg + an unspec constant.
7118
7119 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7120 reg also appears in the address. */
7121
7122 static rtx
7123 legitimize_pic_address (rtx orig, rtx reg)
7124 {
7125 rtx addr = orig;
7126 rtx new = orig;
7127 rtx base;
7128
7129 #if TARGET_MACHO
7130 if (TARGET_MACHO && !TARGET_64BIT)
7131 {
7132 if (reg == 0)
7133 reg = gen_reg_rtx (Pmode);
7134 /* Use the generic Mach-O PIC machinery. */
7135 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7136 }
7137 #endif
7138
7139 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7140 new = addr;
7141 else if (TARGET_64BIT
7142 && ix86_cmodel != CM_SMALL_PIC
7143 && gotoff_operand (addr, Pmode))
7144 {
7145 rtx tmpreg;
7146 /* This symbol may be referenced via a displacement from the PIC
7147 base address (@GOTOFF). */
7148
7149 if (reload_in_progress)
7150 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7151 if (GET_CODE (addr) == CONST)
7152 addr = XEXP (addr, 0);
7153 if (GET_CODE (addr) == PLUS)
7154 {
7155 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7156 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7157 }
7158 else
7159 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7160 new = gen_rtx_CONST (Pmode, new);
7161 if (!reg)
7162 tmpreg = gen_reg_rtx (Pmode);
7163 else
7164 tmpreg = reg;
7165 emit_move_insn (tmpreg, new);
7166
7167 if (reg != 0)
7168 {
7169 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7170 tmpreg, 1, OPTAB_DIRECT);
7171 new = reg;
7172 }
7173 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7174 }
7175 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7176 {
7177 /* This symbol may be referenced via a displacement from the PIC
7178 base address (@GOTOFF). */
7179
7180 if (reload_in_progress)
7181 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7182 if (GET_CODE (addr) == CONST)
7183 addr = XEXP (addr, 0);
7184 if (GET_CODE (addr) == PLUS)
7185 {
7186 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
7187 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
7188 }
7189 else
7190 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7191 new = gen_rtx_CONST (Pmode, new);
7192 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7193
7194 if (reg != 0)
7195 {
7196 emit_move_insn (reg, new);
7197 new = reg;
7198 }
7199 }
7200 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7201 /* We can't use @GOTOFF for text labels on VxWorks;
7202 see gotoff_operand. */
7203 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7204 {
7205 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7206 {
7207 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7208 new = gen_rtx_CONST (Pmode, new);
7209 new = gen_const_mem (Pmode, new);
7210 set_mem_alias_set (new, ix86_GOT_alias_set ());
7211
7212 if (reg == 0)
7213 reg = gen_reg_rtx (Pmode);
7214 /* Use directly gen_movsi, otherwise the address is loaded
7215 into register for CSE. We don't want to CSE this addresses,
7216 instead we CSE addresses from the GOT table, so skip this. */
7217 emit_insn (gen_movsi (reg, new));
7218 new = reg;
7219 }
7220 else
7221 {
7222 /* This symbol must be referenced via a load from the
7223 Global Offset Table (@GOT). */
7224
7225 if (reload_in_progress)
7226 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7227 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7228 new = gen_rtx_CONST (Pmode, new);
7229 if (TARGET_64BIT)
7230 new = force_reg (Pmode, new);
7231 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7232 new = gen_const_mem (Pmode, new);
7233 set_mem_alias_set (new, ix86_GOT_alias_set ());
7234
7235 if (reg == 0)
7236 reg = gen_reg_rtx (Pmode);
7237 emit_move_insn (reg, new);
7238 new = reg;
7239 }
7240 }
7241 else
7242 {
7243 if (CONST_INT_P (addr)
7244 && !x86_64_immediate_operand (addr, VOIDmode))
7245 {
7246 if (reg)
7247 {
7248 emit_move_insn (reg, addr);
7249 new = reg;
7250 }
7251 else
7252 new = force_reg (Pmode, addr);
7253 }
7254 else if (GET_CODE (addr) == CONST)
7255 {
7256 addr = XEXP (addr, 0);
7257
7258 /* We must match stuff we generate before. Assume the only
7259 unspecs that can get here are ours. Not that we could do
7260 anything with them anyway.... */
7261 if (GET_CODE (addr) == UNSPEC
7262 || (GET_CODE (addr) == PLUS
7263 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7264 return orig;
7265 gcc_assert (GET_CODE (addr) == PLUS);
7266 }
7267 if (GET_CODE (addr) == PLUS)
7268 {
7269 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7270
7271 /* Check first to see if this is a constant offset from a @GOTOFF
7272 symbol reference. */
7273 if (gotoff_operand (op0, Pmode)
7274 && CONST_INT_P (op1))
7275 {
7276 if (!TARGET_64BIT)
7277 {
7278 if (reload_in_progress)
7279 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7280 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7281 UNSPEC_GOTOFF);
7282 new = gen_rtx_PLUS (Pmode, new, op1);
7283 new = gen_rtx_CONST (Pmode, new);
7284 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
7285
7286 if (reg != 0)
7287 {
7288 emit_move_insn (reg, new);
7289 new = reg;
7290 }
7291 }
7292 else
7293 {
7294 if (INTVAL (op1) < -16*1024*1024
7295 || INTVAL (op1) >= 16*1024*1024)
7296 {
7297 if (!x86_64_immediate_operand (op1, Pmode))
7298 op1 = force_reg (Pmode, op1);
7299 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7300 }
7301 }
7302 }
7303 else
7304 {
7305 base = legitimize_pic_address (XEXP (addr, 0), reg);
7306 new = legitimize_pic_address (XEXP (addr, 1),
7307 base == reg ? NULL_RTX : reg);
7308
7309 if (CONST_INT_P (new))
7310 new = plus_constant (base, INTVAL (new));
7311 else
7312 {
7313 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7314 {
7315 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7316 new = XEXP (new, 1);
7317 }
7318 new = gen_rtx_PLUS (Pmode, base, new);
7319 }
7320 }
7321 }
7322 }
7323 return new;
7324 }
7325 \f
7326 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7327
7328 static rtx
7329 get_thread_pointer (int to_reg)
7330 {
7331 rtx tp, reg, insn;
7332
7333 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7334 if (!to_reg)
7335 return tp;
7336
7337 reg = gen_reg_rtx (Pmode);
7338 insn = gen_rtx_SET (VOIDmode, reg, tp);
7339 insn = emit_insn (insn);
7340
7341 return reg;
7342 }
7343
7344 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7345 false if we expect this to be used for a memory address and true if
7346 we expect to load the address into a register. */
7347
7348 static rtx
7349 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7350 {
7351 rtx dest, base, off, pic, tp;
7352 int type;
7353
7354 switch (model)
7355 {
7356 case TLS_MODEL_GLOBAL_DYNAMIC:
7357 dest = gen_reg_rtx (Pmode);
7358 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7359
7360 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7361 {
7362 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7363
7364 start_sequence ();
7365 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7366 insns = get_insns ();
7367 end_sequence ();
7368
7369 emit_libcall_block (insns, dest, rax, x);
7370 }
7371 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7372 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7373 else
7374 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7375
7376 if (TARGET_GNU2_TLS)
7377 {
7378 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7379
7380 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7381 }
7382 break;
7383
7384 case TLS_MODEL_LOCAL_DYNAMIC:
7385 base = gen_reg_rtx (Pmode);
7386 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7387
7388 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7389 {
7390 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7391
7392 start_sequence ();
7393 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7394 insns = get_insns ();
7395 end_sequence ();
7396
7397 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7398 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7399 emit_libcall_block (insns, base, rax, note);
7400 }
7401 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7402 emit_insn (gen_tls_local_dynamic_base_64 (base));
7403 else
7404 emit_insn (gen_tls_local_dynamic_base_32 (base));
7405
7406 if (TARGET_GNU2_TLS)
7407 {
7408 rtx x = ix86_tls_module_base ();
7409
7410 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7411 gen_rtx_MINUS (Pmode, x, tp));
7412 }
7413
7414 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7415 off = gen_rtx_CONST (Pmode, off);
7416
7417 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7418
7419 if (TARGET_GNU2_TLS)
7420 {
7421 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7422
7423 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7424 }
7425
7426 break;
7427
7428 case TLS_MODEL_INITIAL_EXEC:
7429 if (TARGET_64BIT)
7430 {
7431 pic = NULL;
7432 type = UNSPEC_GOTNTPOFF;
7433 }
7434 else if (flag_pic)
7435 {
7436 if (reload_in_progress)
7437 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7438 pic = pic_offset_table_rtx;
7439 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7440 }
7441 else if (!TARGET_ANY_GNU_TLS)
7442 {
7443 pic = gen_reg_rtx (Pmode);
7444 emit_insn (gen_set_got (pic));
7445 type = UNSPEC_GOTTPOFF;
7446 }
7447 else
7448 {
7449 pic = NULL;
7450 type = UNSPEC_INDNTPOFF;
7451 }
7452
7453 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7454 off = gen_rtx_CONST (Pmode, off);
7455 if (pic)
7456 off = gen_rtx_PLUS (Pmode, pic, off);
7457 off = gen_const_mem (Pmode, off);
7458 set_mem_alias_set (off, ix86_GOT_alias_set ());
7459
7460 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7461 {
7462 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7463 off = force_reg (Pmode, off);
7464 return gen_rtx_PLUS (Pmode, base, off);
7465 }
7466 else
7467 {
7468 base = get_thread_pointer (true);
7469 dest = gen_reg_rtx (Pmode);
7470 emit_insn (gen_subsi3 (dest, base, off));
7471 }
7472 break;
7473
7474 case TLS_MODEL_LOCAL_EXEC:
7475 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7476 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7477 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7478 off = gen_rtx_CONST (Pmode, off);
7479
7480 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7481 {
7482 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7483 return gen_rtx_PLUS (Pmode, base, off);
7484 }
7485 else
7486 {
7487 base = get_thread_pointer (true);
7488 dest = gen_reg_rtx (Pmode);
7489 emit_insn (gen_subsi3 (dest, base, off));
7490 }
7491 break;
7492
7493 default:
7494 gcc_unreachable ();
7495 }
7496
7497 return dest;
7498 }
7499
7500 /* Try machine-dependent ways of modifying an illegitimate address
7501 to be legitimate. If we find one, return the new, valid address.
7502 This macro is used in only one place: `memory_address' in explow.c.
7503
7504 OLDX is the address as it was before break_out_memory_refs was called.
7505 In some cases it is useful to look at this to decide what needs to be done.
7506
7507 MODE and WIN are passed so that this macro can use
7508 GO_IF_LEGITIMATE_ADDRESS.
7509
7510 It is always safe for this macro to do nothing. It exists to recognize
7511 opportunities to optimize the output.
7512
7513 For the 80386, we handle X+REG by loading X into a register R and
7514 using R+REG. R will go in a general reg and indexing will be used.
7515 However, if REG is a broken-out memory address or multiplication,
7516 nothing needs to be done because REG can certainly go in a general reg.
7517
7518 When -fpic is used, special handling is needed for symbolic references.
7519 See comments by legitimize_pic_address in i386.c for details. */
7520
7521 rtx
7522 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7523 {
7524 int changed = 0;
7525 unsigned log;
7526
7527 if (TARGET_DEBUG_ADDR)
7528 {
7529 fprintf (stderr, "\n==========\nLEGITIMIZE_ADDRESS, mode = %s\n",
7530 GET_MODE_NAME (mode));
7531 debug_rtx (x);
7532 }
7533
7534 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7535 if (log)
7536 return legitimize_tls_address (x, log, false);
7537 if (GET_CODE (x) == CONST
7538 && GET_CODE (XEXP (x, 0)) == PLUS
7539 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7540 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7541 {
7542 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7543 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7544 }
7545
7546 if (flag_pic && SYMBOLIC_CONST (x))
7547 return legitimize_pic_address (x, 0);
7548
7549 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7550 if (GET_CODE (x) == ASHIFT
7551 && CONST_INT_P (XEXP (x, 1))
7552 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7553 {
7554 changed = 1;
7555 log = INTVAL (XEXP (x, 1));
7556 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7557 GEN_INT (1 << log));
7558 }
7559
7560 if (GET_CODE (x) == PLUS)
7561 {
7562 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7563
7564 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7565 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7566 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7567 {
7568 changed = 1;
7569 log = INTVAL (XEXP (XEXP (x, 0), 1));
7570 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7571 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7572 GEN_INT (1 << log));
7573 }
7574
7575 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7576 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7577 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7578 {
7579 changed = 1;
7580 log = INTVAL (XEXP (XEXP (x, 1), 1));
7581 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7582 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7583 GEN_INT (1 << log));
7584 }
7585
7586 /* Put multiply first if it isn't already. */
7587 if (GET_CODE (XEXP (x, 1)) == MULT)
7588 {
7589 rtx tmp = XEXP (x, 0);
7590 XEXP (x, 0) = XEXP (x, 1);
7591 XEXP (x, 1) = tmp;
7592 changed = 1;
7593 }
7594
7595 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7596 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7597 created by virtual register instantiation, register elimination, and
7598 similar optimizations. */
7599 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7600 {
7601 changed = 1;
7602 x = gen_rtx_PLUS (Pmode,
7603 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7604 XEXP (XEXP (x, 1), 0)),
7605 XEXP (XEXP (x, 1), 1));
7606 }
7607
7608 /* Canonicalize
7609 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7610 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7611 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7612 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7613 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7614 && CONSTANT_P (XEXP (x, 1)))
7615 {
7616 rtx constant;
7617 rtx other = NULL_RTX;
7618
7619 if (CONST_INT_P (XEXP (x, 1)))
7620 {
7621 constant = XEXP (x, 1);
7622 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7623 }
7624 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7625 {
7626 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7627 other = XEXP (x, 1);
7628 }
7629 else
7630 constant = 0;
7631
7632 if (constant)
7633 {
7634 changed = 1;
7635 x = gen_rtx_PLUS (Pmode,
7636 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7637 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7638 plus_constant (other, INTVAL (constant)));
7639 }
7640 }
7641
7642 if (changed && legitimate_address_p (mode, x, FALSE))
7643 return x;
7644
7645 if (GET_CODE (XEXP (x, 0)) == MULT)
7646 {
7647 changed = 1;
7648 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7649 }
7650
7651 if (GET_CODE (XEXP (x, 1)) == MULT)
7652 {
7653 changed = 1;
7654 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7655 }
7656
7657 if (changed
7658 && REG_P (XEXP (x, 1))
7659 && REG_P (XEXP (x, 0)))
7660 return x;
7661
7662 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7663 {
7664 changed = 1;
7665 x = legitimize_pic_address (x, 0);
7666 }
7667
7668 if (changed && legitimate_address_p (mode, x, FALSE))
7669 return x;
7670
7671 if (REG_P (XEXP (x, 0)))
7672 {
7673 rtx temp = gen_reg_rtx (Pmode);
7674 rtx val = force_operand (XEXP (x, 1), temp);
7675 if (val != temp)
7676 emit_move_insn (temp, val);
7677
7678 XEXP (x, 1) = temp;
7679 return x;
7680 }
7681
7682 else if (REG_P (XEXP (x, 1)))
7683 {
7684 rtx temp = gen_reg_rtx (Pmode);
7685 rtx val = force_operand (XEXP (x, 0), temp);
7686 if (val != temp)
7687 emit_move_insn (temp, val);
7688
7689 XEXP (x, 0) = temp;
7690 return x;
7691 }
7692 }
7693
7694 return x;
7695 }
7696 \f
7697 /* Print an integer constant expression in assembler syntax. Addition
7698 and subtraction are the only arithmetic that may appear in these
7699 expressions. FILE is the stdio stream to write to, X is the rtx, and
7700 CODE is the operand print code from the output string. */
7701
7702 static void
7703 output_pic_addr_const (FILE *file, rtx x, int code)
7704 {
7705 char buf[256];
7706
7707 switch (GET_CODE (x))
7708 {
7709 case PC:
7710 gcc_assert (flag_pic);
7711 putc ('.', file);
7712 break;
7713
7714 case SYMBOL_REF:
7715 if (! TARGET_MACHO || TARGET_64BIT)
7716 output_addr_const (file, x);
7717 else
7718 {
7719 const char *name = XSTR (x, 0);
7720
7721 /* Mark the decl as referenced so that cgraph will output the function. */
7722 if (SYMBOL_REF_DECL (x))
7723 mark_decl_referenced (SYMBOL_REF_DECL (x));
7724
7725 #if TARGET_MACHO
7726 if (MACHOPIC_INDIRECT
7727 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7728 name = machopic_indirection_name (x, /*stub_p=*/true);
7729 #endif
7730 assemble_name (file, name);
7731 }
7732 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7733 fputs ("@PLT", file);
7734 break;
7735
7736 case LABEL_REF:
7737 x = XEXP (x, 0);
7738 /* FALLTHRU */
7739 case CODE_LABEL:
7740 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7741 assemble_name (asm_out_file, buf);
7742 break;
7743
7744 case CONST_INT:
7745 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7746 break;
7747
7748 case CONST:
7749 /* This used to output parentheses around the expression,
7750 but that does not work on the 386 (either ATT or BSD assembler). */
7751 output_pic_addr_const (file, XEXP (x, 0), code);
7752 break;
7753
7754 case CONST_DOUBLE:
7755 if (GET_MODE (x) == VOIDmode)
7756 {
7757 /* We can use %d if the number is <32 bits and positive. */
7758 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7759 fprintf (file, "0x%lx%08lx",
7760 (unsigned long) CONST_DOUBLE_HIGH (x),
7761 (unsigned long) CONST_DOUBLE_LOW (x));
7762 else
7763 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7764 }
7765 else
7766 /* We can't handle floating point constants;
7767 PRINT_OPERAND must handle them. */
7768 output_operand_lossage ("floating constant misused");
7769 break;
7770
7771 case PLUS:
7772 /* Some assemblers need integer constants to appear first. */
7773 if (CONST_INT_P (XEXP (x, 0)))
7774 {
7775 output_pic_addr_const (file, XEXP (x, 0), code);
7776 putc ('+', file);
7777 output_pic_addr_const (file, XEXP (x, 1), code);
7778 }
7779 else
7780 {
7781 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7782 output_pic_addr_const (file, XEXP (x, 1), code);
7783 putc ('+', file);
7784 output_pic_addr_const (file, XEXP (x, 0), code);
7785 }
7786 break;
7787
7788 case MINUS:
7789 if (!TARGET_MACHO)
7790 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7791 output_pic_addr_const (file, XEXP (x, 0), code);
7792 putc ('-', file);
7793 output_pic_addr_const (file, XEXP (x, 1), code);
7794 if (!TARGET_MACHO)
7795 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7796 break;
7797
7798 case UNSPEC:
7799 gcc_assert (XVECLEN (x, 0) == 1);
7800 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7801 switch (XINT (x, 1))
7802 {
7803 case UNSPEC_GOT:
7804 fputs ("@GOT", file);
7805 break;
7806 case UNSPEC_GOTOFF:
7807 fputs ("@GOTOFF", file);
7808 break;
7809 case UNSPEC_PLTOFF:
7810 fputs ("@PLTOFF", file);
7811 break;
7812 case UNSPEC_GOTPCREL:
7813 fputs ("@GOTPCREL(%rip)", file);
7814 break;
7815 case UNSPEC_GOTTPOFF:
7816 /* FIXME: This might be @TPOFF in Sun ld too. */
7817 fputs ("@GOTTPOFF", file);
7818 break;
7819 case UNSPEC_TPOFF:
7820 fputs ("@TPOFF", file);
7821 break;
7822 case UNSPEC_NTPOFF:
7823 if (TARGET_64BIT)
7824 fputs ("@TPOFF", file);
7825 else
7826 fputs ("@NTPOFF", file);
7827 break;
7828 case UNSPEC_DTPOFF:
7829 fputs ("@DTPOFF", file);
7830 break;
7831 case UNSPEC_GOTNTPOFF:
7832 if (TARGET_64BIT)
7833 fputs ("@GOTTPOFF(%rip)", file);
7834 else
7835 fputs ("@GOTNTPOFF", file);
7836 break;
7837 case UNSPEC_INDNTPOFF:
7838 fputs ("@INDNTPOFF", file);
7839 break;
7840 default:
7841 output_operand_lossage ("invalid UNSPEC as operand");
7842 break;
7843 }
7844 break;
7845
7846 default:
7847 output_operand_lossage ("invalid expression as operand");
7848 }
7849 }
7850
7851 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7852 We need to emit DTP-relative relocations. */
7853
7854 static void
7855 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7856 {
7857 fputs (ASM_LONG, file);
7858 output_addr_const (file, x);
7859 fputs ("@DTPOFF", file);
7860 switch (size)
7861 {
7862 case 4:
7863 break;
7864 case 8:
7865 fputs (", 0", file);
7866 break;
7867 default:
7868 gcc_unreachable ();
7869 }
7870 }
7871
7872 /* In the name of slightly smaller debug output, and to cater to
7873 general assembler lossage, recognize PIC+GOTOFF and turn it back
7874 into a direct symbol reference.
7875
7876 On Darwin, this is necessary to avoid a crash, because Darwin
7877 has a different PIC label for each routine but the DWARF debugging
7878 information is not associated with any particular routine, so it's
7879 necessary to remove references to the PIC label from RTL stored by
7880 the DWARF output code. */
7881
7882 static rtx
7883 ix86_delegitimize_address (rtx orig_x)
7884 {
7885 rtx x = orig_x;
7886 /* reg_addend is NULL or a multiple of some register. */
7887 rtx reg_addend = NULL_RTX;
7888 /* const_addend is NULL or a const_int. */
7889 rtx const_addend = NULL_RTX;
7890 /* This is the result, or NULL. */
7891 rtx result = NULL_RTX;
7892
7893 if (MEM_P (x))
7894 x = XEXP (x, 0);
7895
7896 if (TARGET_64BIT)
7897 {
7898 if (GET_CODE (x) != CONST
7899 || GET_CODE (XEXP (x, 0)) != UNSPEC
7900 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7901 || !MEM_P (orig_x))
7902 return orig_x;
7903 return XVECEXP (XEXP (x, 0), 0, 0);
7904 }
7905
7906 if (GET_CODE (x) != PLUS
7907 || GET_CODE (XEXP (x, 1)) != CONST)
7908 return orig_x;
7909
7910 if (REG_P (XEXP (x, 0))
7911 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7912 /* %ebx + GOT/GOTOFF */
7913 ;
7914 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7915 {
7916 /* %ebx + %reg * scale + GOT/GOTOFF */
7917 reg_addend = XEXP (x, 0);
7918 if (REG_P (XEXP (reg_addend, 0))
7919 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7920 reg_addend = XEXP (reg_addend, 1);
7921 else if (REG_P (XEXP (reg_addend, 1))
7922 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7923 reg_addend = XEXP (reg_addend, 0);
7924 else
7925 return orig_x;
7926 if (!REG_P (reg_addend)
7927 && GET_CODE (reg_addend) != MULT
7928 && GET_CODE (reg_addend) != ASHIFT)
7929 return orig_x;
7930 }
7931 else
7932 return orig_x;
7933
7934 x = XEXP (XEXP (x, 1), 0);
7935 if (GET_CODE (x) == PLUS
7936 && CONST_INT_P (XEXP (x, 1)))
7937 {
7938 const_addend = XEXP (x, 1);
7939 x = XEXP (x, 0);
7940 }
7941
7942 if (GET_CODE (x) == UNSPEC
7943 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7944 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7945 result = XVECEXP (x, 0, 0);
7946
7947 if (TARGET_MACHO && darwin_local_data_pic (x)
7948 && !MEM_P (orig_x))
7949 result = XEXP (x, 0);
7950
7951 if (! result)
7952 return orig_x;
7953
7954 if (const_addend)
7955 result = gen_rtx_PLUS (Pmode, result, const_addend);
7956 if (reg_addend)
7957 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7958 return result;
7959 }
7960 \f
7961 static void
7962 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7963 int fp, FILE *file)
7964 {
7965 const char *suffix;
7966
7967 if (mode == CCFPmode || mode == CCFPUmode)
7968 {
7969 enum rtx_code second_code, bypass_code;
7970 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7971 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7972 code = ix86_fp_compare_code_to_integer (code);
7973 mode = CCmode;
7974 }
7975 if (reverse)
7976 code = reverse_condition (code);
7977
7978 switch (code)
7979 {
7980 case EQ:
7981 suffix = "e";
7982 break;
7983 case NE:
7984 suffix = "ne";
7985 break;
7986 case GT:
7987 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7988 suffix = "g";
7989 break;
7990 case GTU:
7991 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7992 Those same assemblers have the same but opposite lossage on cmov. */
7993 gcc_assert (mode == CCmode);
7994 suffix = fp ? "nbe" : "a";
7995 break;
7996 case LT:
7997 switch (mode)
7998 {
7999 case CCNOmode:
8000 case CCGOCmode:
8001 suffix = "s";
8002 break;
8003
8004 case CCmode:
8005 case CCGCmode:
8006 suffix = "l";
8007 break;
8008
8009 default:
8010 gcc_unreachable ();
8011 }
8012 break;
8013 case LTU:
8014 gcc_assert (mode == CCmode);
8015 suffix = "b";
8016 break;
8017 case GE:
8018 switch (mode)
8019 {
8020 case CCNOmode:
8021 case CCGOCmode:
8022 suffix = "ns";
8023 break;
8024
8025 case CCmode:
8026 case CCGCmode:
8027 suffix = "ge";
8028 break;
8029
8030 default:
8031 gcc_unreachable ();
8032 }
8033 break;
8034 case GEU:
8035 /* ??? As above. */
8036 gcc_assert (mode == CCmode);
8037 suffix = fp ? "nb" : "ae";
8038 break;
8039 case LE:
8040 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8041 suffix = "le";
8042 break;
8043 case LEU:
8044 gcc_assert (mode == CCmode);
8045 suffix = "be";
8046 break;
8047 case UNORDERED:
8048 suffix = fp ? "u" : "p";
8049 break;
8050 case ORDERED:
8051 suffix = fp ? "nu" : "np";
8052 break;
8053 default:
8054 gcc_unreachable ();
8055 }
8056 fputs (suffix, file);
8057 }
8058
8059 /* Print the name of register X to FILE based on its machine mode and number.
8060 If CODE is 'w', pretend the mode is HImode.
8061 If CODE is 'b', pretend the mode is QImode.
8062 If CODE is 'k', pretend the mode is SImode.
8063 If CODE is 'q', pretend the mode is DImode.
8064 If CODE is 'h', pretend the reg is the 'high' byte register.
8065 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8066
8067 void
8068 print_reg (rtx x, int code, FILE *file)
8069 {
8070 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8071 && REGNO (x) != FRAME_POINTER_REGNUM
8072 && REGNO (x) != FLAGS_REG
8073 && REGNO (x) != FPSR_REG
8074 && REGNO (x) != FPCR_REG);
8075
8076 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8077 putc ('%', file);
8078
8079 if (code == 'w' || MMX_REG_P (x))
8080 code = 2;
8081 else if (code == 'b')
8082 code = 1;
8083 else if (code == 'k')
8084 code = 4;
8085 else if (code == 'q')
8086 code = 8;
8087 else if (code == 'y')
8088 code = 3;
8089 else if (code == 'h')
8090 code = 0;
8091 else
8092 code = GET_MODE_SIZE (GET_MODE (x));
8093
8094 /* Irritatingly, AMD extended registers use different naming convention
8095 from the normal registers. */
8096 if (REX_INT_REG_P (x))
8097 {
8098 gcc_assert (TARGET_64BIT);
8099 switch (code)
8100 {
8101 case 0:
8102 error ("extended registers have no high halves");
8103 break;
8104 case 1:
8105 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8106 break;
8107 case 2:
8108 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8109 break;
8110 case 4:
8111 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8112 break;
8113 case 8:
8114 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8115 break;
8116 default:
8117 error ("unsupported operand size for extended register");
8118 break;
8119 }
8120 return;
8121 }
8122 switch (code)
8123 {
8124 case 3:
8125 if (STACK_TOP_P (x))
8126 {
8127 fputs ("st(0)", file);
8128 break;
8129 }
8130 /* FALLTHRU */
8131 case 8:
8132 case 4:
8133 case 12:
8134 if (! ANY_FP_REG_P (x))
8135 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8136 /* FALLTHRU */
8137 case 16:
8138 case 2:
8139 normal:
8140 fputs (hi_reg_name[REGNO (x)], file);
8141 break;
8142 case 1:
8143 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8144 goto normal;
8145 fputs (qi_reg_name[REGNO (x)], file);
8146 break;
8147 case 0:
8148 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8149 goto normal;
8150 fputs (qi_high_reg_name[REGNO (x)], file);
8151 break;
8152 default:
8153 gcc_unreachable ();
8154 }
8155 }
8156
8157 /* Locate some local-dynamic symbol still in use by this function
8158 so that we can print its name in some tls_local_dynamic_base
8159 pattern. */
8160
8161 static const char *
8162 get_some_local_dynamic_name (void)
8163 {
8164 rtx insn;
8165
8166 if (cfun->machine->some_ld_name)
8167 return cfun->machine->some_ld_name;
8168
8169 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8170 if (INSN_P (insn)
8171 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8172 return cfun->machine->some_ld_name;
8173
8174 gcc_unreachable ();
8175 }
8176
8177 static int
8178 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8179 {
8180 rtx x = *px;
8181
8182 if (GET_CODE (x) == SYMBOL_REF
8183 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8184 {
8185 cfun->machine->some_ld_name = XSTR (x, 0);
8186 return 1;
8187 }
8188
8189 return 0;
8190 }
8191
8192 /* Meaning of CODE:
8193 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8194 C -- print opcode suffix for set/cmov insn.
8195 c -- like C, but print reversed condition
8196 F,f -- likewise, but for floating-point.
8197 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8198 otherwise nothing
8199 R -- print the prefix for register names.
8200 z -- print the opcode suffix for the size of the current operand.
8201 * -- print a star (in certain assembler syntax)
8202 A -- print an absolute memory reference.
8203 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8204 s -- print a shift double count, followed by the assemblers argument
8205 delimiter.
8206 b -- print the QImode name of the register for the indicated operand.
8207 %b0 would print %al if operands[0] is reg 0.
8208 w -- likewise, print the HImode name of the register.
8209 k -- likewise, print the SImode name of the register.
8210 q -- likewise, print the DImode name of the register.
8211 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8212 y -- print "st(0)" instead of "st" as a register.
8213 D -- print condition for SSE cmp instruction.
8214 P -- if PIC, print an @PLT suffix.
8215 X -- don't print any sort of PIC '@' suffix for a symbol.
8216 & -- print some in-use local-dynamic symbol name.
8217 H -- print a memory address offset by 8; used for sse high-parts
8218 */
8219
8220 void
8221 print_operand (FILE *file, rtx x, int code)
8222 {
8223 if (code)
8224 {
8225 switch (code)
8226 {
8227 case '*':
8228 if (ASSEMBLER_DIALECT == ASM_ATT)
8229 putc ('*', file);
8230 return;
8231
8232 case '&':
8233 assemble_name (file, get_some_local_dynamic_name ());
8234 return;
8235
8236 case 'A':
8237 switch (ASSEMBLER_DIALECT)
8238 {
8239 case ASM_ATT:
8240 putc ('*', file);
8241 break;
8242
8243 case ASM_INTEL:
8244 /* Intel syntax. For absolute addresses, registers should not
8245 be surrounded by braces. */
8246 if (!REG_P (x))
8247 {
8248 putc ('[', file);
8249 PRINT_OPERAND (file, x, 0);
8250 putc (']', file);
8251 return;
8252 }
8253 break;
8254
8255 default:
8256 gcc_unreachable ();
8257 }
8258
8259 PRINT_OPERAND (file, x, 0);
8260 return;
8261
8262
8263 case 'L':
8264 if (ASSEMBLER_DIALECT == ASM_ATT)
8265 putc ('l', file);
8266 return;
8267
8268 case 'W':
8269 if (ASSEMBLER_DIALECT == ASM_ATT)
8270 putc ('w', file);
8271 return;
8272
8273 case 'B':
8274 if (ASSEMBLER_DIALECT == ASM_ATT)
8275 putc ('b', file);
8276 return;
8277
8278 case 'Q':
8279 if (ASSEMBLER_DIALECT == ASM_ATT)
8280 putc ('l', file);
8281 return;
8282
8283 case 'S':
8284 if (ASSEMBLER_DIALECT == ASM_ATT)
8285 putc ('s', file);
8286 return;
8287
8288 case 'T':
8289 if (ASSEMBLER_DIALECT == ASM_ATT)
8290 putc ('t', file);
8291 return;
8292
8293 case 'z':
8294 /* 387 opcodes don't get size suffixes if the operands are
8295 registers. */
8296 if (STACK_REG_P (x))
8297 return;
8298
8299 /* Likewise if using Intel opcodes. */
8300 if (ASSEMBLER_DIALECT == ASM_INTEL)
8301 return;
8302
8303 /* This is the size of op from size of operand. */
8304 switch (GET_MODE_SIZE (GET_MODE (x)))
8305 {
8306 case 1:
8307 putc ('b', file);
8308 return;
8309
8310 case 2:
8311 #ifdef HAVE_GAS_FILDS_FISTS
8312 putc ('s', file);
8313 #endif
8314 return;
8315
8316 case 4:
8317 if (GET_MODE (x) == SFmode)
8318 {
8319 putc ('s', file);
8320 return;
8321 }
8322 else
8323 putc ('l', file);
8324 return;
8325
8326 case 12:
8327 case 16:
8328 putc ('t', file);
8329 return;
8330
8331 case 8:
8332 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8333 {
8334 #ifdef GAS_MNEMONICS
8335 putc ('q', file);
8336 #else
8337 putc ('l', file);
8338 putc ('l', file);
8339 #endif
8340 }
8341 else
8342 putc ('l', file);
8343 return;
8344
8345 default:
8346 gcc_unreachable ();
8347 }
8348
8349 case 'b':
8350 case 'w':
8351 case 'k':
8352 case 'q':
8353 case 'h':
8354 case 'y':
8355 case 'X':
8356 case 'P':
8357 break;
8358
8359 case 's':
8360 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8361 {
8362 PRINT_OPERAND (file, x, 0);
8363 putc (',', file);
8364 }
8365 return;
8366
8367 case 'D':
8368 /* Little bit of braindamage here. The SSE compare instructions
8369 does use completely different names for the comparisons that the
8370 fp conditional moves. */
8371 switch (GET_CODE (x))
8372 {
8373 case EQ:
8374 case UNEQ:
8375 fputs ("eq", file);
8376 break;
8377 case LT:
8378 case UNLT:
8379 fputs ("lt", file);
8380 break;
8381 case LE:
8382 case UNLE:
8383 fputs ("le", file);
8384 break;
8385 case UNORDERED:
8386 fputs ("unord", file);
8387 break;
8388 case NE:
8389 case LTGT:
8390 fputs ("neq", file);
8391 break;
8392 case UNGE:
8393 case GE:
8394 fputs ("nlt", file);
8395 break;
8396 case UNGT:
8397 case GT:
8398 fputs ("nle", file);
8399 break;
8400 case ORDERED:
8401 fputs ("ord", file);
8402 break;
8403 default:
8404 gcc_unreachable ();
8405 }
8406 return;
8407 case 'O':
8408 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8409 if (ASSEMBLER_DIALECT == ASM_ATT)
8410 {
8411 switch (GET_MODE (x))
8412 {
8413 case HImode: putc ('w', file); break;
8414 case SImode:
8415 case SFmode: putc ('l', file); break;
8416 case DImode:
8417 case DFmode: putc ('q', file); break;
8418 default: gcc_unreachable ();
8419 }
8420 putc ('.', file);
8421 }
8422 #endif
8423 return;
8424 case 'C':
8425 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8426 return;
8427 case 'F':
8428 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8429 if (ASSEMBLER_DIALECT == ASM_ATT)
8430 putc ('.', file);
8431 #endif
8432 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8433 return;
8434
8435 /* Like above, but reverse condition */
8436 case 'c':
8437 /* Check to see if argument to %c is really a constant
8438 and not a condition code which needs to be reversed. */
8439 if (!COMPARISON_P (x))
8440 {
8441 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8442 return;
8443 }
8444 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8445 return;
8446 case 'f':
8447 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8448 if (ASSEMBLER_DIALECT == ASM_ATT)
8449 putc ('.', file);
8450 #endif
8451 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8452 return;
8453
8454 case 'H':
8455 /* It doesn't actually matter what mode we use here, as we're
8456 only going to use this for printing. */
8457 x = adjust_address_nv (x, DImode, 8);
8458 break;
8459
8460 case '+':
8461 {
8462 rtx x;
8463
8464 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8465 return;
8466
8467 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8468 if (x)
8469 {
8470 int pred_val = INTVAL (XEXP (x, 0));
8471
8472 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8473 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8474 {
8475 int taken = pred_val > REG_BR_PROB_BASE / 2;
8476 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8477
8478 /* Emit hints only in the case default branch prediction
8479 heuristics would fail. */
8480 if (taken != cputaken)
8481 {
8482 /* We use 3e (DS) prefix for taken branches and
8483 2e (CS) prefix for not taken branches. */
8484 if (taken)
8485 fputs ("ds ; ", file);
8486 else
8487 fputs ("cs ; ", file);
8488 }
8489 }
8490 }
8491 return;
8492 }
8493 default:
8494 output_operand_lossage ("invalid operand code '%c'", code);
8495 }
8496 }
8497
8498 if (REG_P (x))
8499 print_reg (x, code, file);
8500
8501 else if (MEM_P (x))
8502 {
8503 /* No `byte ptr' prefix for call instructions. */
8504 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8505 {
8506 const char * size;
8507 switch (GET_MODE_SIZE (GET_MODE (x)))
8508 {
8509 case 1: size = "BYTE"; break;
8510 case 2: size = "WORD"; break;
8511 case 4: size = "DWORD"; break;
8512 case 8: size = "QWORD"; break;
8513 case 12: size = "XWORD"; break;
8514 case 16: size = "XMMWORD"; break;
8515 default:
8516 gcc_unreachable ();
8517 }
8518
8519 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8520 if (code == 'b')
8521 size = "BYTE";
8522 else if (code == 'w')
8523 size = "WORD";
8524 else if (code == 'k')
8525 size = "DWORD";
8526
8527 fputs (size, file);
8528 fputs (" PTR ", file);
8529 }
8530
8531 x = XEXP (x, 0);
8532 /* Avoid (%rip) for call operands. */
8533 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8534 && !CONST_INT_P (x))
8535 output_addr_const (file, x);
8536 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8537 output_operand_lossage ("invalid constraints for operand");
8538 else
8539 output_address (x);
8540 }
8541
8542 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8543 {
8544 REAL_VALUE_TYPE r;
8545 long l;
8546
8547 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8548 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8549
8550 if (ASSEMBLER_DIALECT == ASM_ATT)
8551 putc ('$', file);
8552 fprintf (file, "0x%08lx", l);
8553 }
8554
8555 /* These float cases don't actually occur as immediate operands. */
8556 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8557 {
8558 char dstr[30];
8559
8560 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8561 fprintf (file, "%s", dstr);
8562 }
8563
8564 else if (GET_CODE (x) == CONST_DOUBLE
8565 && GET_MODE (x) == XFmode)
8566 {
8567 char dstr[30];
8568
8569 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8570 fprintf (file, "%s", dstr);
8571 }
8572
8573 else
8574 {
8575 /* We have patterns that allow zero sets of memory, for instance.
8576 In 64-bit mode, we should probably support all 8-byte vectors,
8577 since we can in fact encode that into an immediate. */
8578 if (GET_CODE (x) == CONST_VECTOR)
8579 {
8580 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8581 x = const0_rtx;
8582 }
8583
8584 if (code != 'P')
8585 {
8586 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8587 {
8588 if (ASSEMBLER_DIALECT == ASM_ATT)
8589 putc ('$', file);
8590 }
8591 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8592 || GET_CODE (x) == LABEL_REF)
8593 {
8594 if (ASSEMBLER_DIALECT == ASM_ATT)
8595 putc ('$', file);
8596 else
8597 fputs ("OFFSET FLAT:", file);
8598 }
8599 }
8600 if (CONST_INT_P (x))
8601 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8602 else if (flag_pic)
8603 output_pic_addr_const (file, x, code);
8604 else
8605 output_addr_const (file, x);
8606 }
8607 }
8608 \f
8609 /* Print a memory operand whose address is ADDR. */
8610
8611 void
8612 print_operand_address (FILE *file, rtx addr)
8613 {
8614 struct ix86_address parts;
8615 rtx base, index, disp;
8616 int scale;
8617 int ok = ix86_decompose_address (addr, &parts);
8618
8619 gcc_assert (ok);
8620
8621 base = parts.base;
8622 index = parts.index;
8623 disp = parts.disp;
8624 scale = parts.scale;
8625
8626 switch (parts.seg)
8627 {
8628 case SEG_DEFAULT:
8629 break;
8630 case SEG_FS:
8631 case SEG_GS:
8632 if (USER_LABEL_PREFIX[0] == 0)
8633 putc ('%', file);
8634 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8635 break;
8636 default:
8637 gcc_unreachable ();
8638 }
8639
8640 if (!base && !index)
8641 {
8642 /* Displacement only requires special attention. */
8643
8644 if (CONST_INT_P (disp))
8645 {
8646 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8647 {
8648 if (USER_LABEL_PREFIX[0] == 0)
8649 putc ('%', file);
8650 fputs ("ds:", file);
8651 }
8652 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8653 }
8654 else if (flag_pic)
8655 output_pic_addr_const (file, disp, 0);
8656 else
8657 output_addr_const (file, disp);
8658
8659 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8660 if (TARGET_64BIT)
8661 {
8662 if (GET_CODE (disp) == CONST
8663 && GET_CODE (XEXP (disp, 0)) == PLUS
8664 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8665 disp = XEXP (XEXP (disp, 0), 0);
8666 if (GET_CODE (disp) == LABEL_REF
8667 || (GET_CODE (disp) == SYMBOL_REF
8668 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8669 fputs ("(%rip)", file);
8670 }
8671 }
8672 else
8673 {
8674 if (ASSEMBLER_DIALECT == ASM_ATT)
8675 {
8676 if (disp)
8677 {
8678 if (flag_pic)
8679 output_pic_addr_const (file, disp, 0);
8680 else if (GET_CODE (disp) == LABEL_REF)
8681 output_asm_label (disp);
8682 else
8683 output_addr_const (file, disp);
8684 }
8685
8686 putc ('(', file);
8687 if (base)
8688 print_reg (base, 0, file);
8689 if (index)
8690 {
8691 putc (',', file);
8692 print_reg (index, 0, file);
8693 if (scale != 1)
8694 fprintf (file, ",%d", scale);
8695 }
8696 putc (')', file);
8697 }
8698 else
8699 {
8700 rtx offset = NULL_RTX;
8701
8702 if (disp)
8703 {
8704 /* Pull out the offset of a symbol; print any symbol itself. */
8705 if (GET_CODE (disp) == CONST
8706 && GET_CODE (XEXP (disp, 0)) == PLUS
8707 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8708 {
8709 offset = XEXP (XEXP (disp, 0), 1);
8710 disp = gen_rtx_CONST (VOIDmode,
8711 XEXP (XEXP (disp, 0), 0));
8712 }
8713
8714 if (flag_pic)
8715 output_pic_addr_const (file, disp, 0);
8716 else if (GET_CODE (disp) == LABEL_REF)
8717 output_asm_label (disp);
8718 else if (CONST_INT_P (disp))
8719 offset = disp;
8720 else
8721 output_addr_const (file, disp);
8722 }
8723
8724 putc ('[', file);
8725 if (base)
8726 {
8727 print_reg (base, 0, file);
8728 if (offset)
8729 {
8730 if (INTVAL (offset) >= 0)
8731 putc ('+', file);
8732 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8733 }
8734 }
8735 else if (offset)
8736 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8737 else
8738 putc ('0', file);
8739
8740 if (index)
8741 {
8742 putc ('+', file);
8743 print_reg (index, 0, file);
8744 if (scale != 1)
8745 fprintf (file, "*%d", scale);
8746 }
8747 putc (']', file);
8748 }
8749 }
8750 }
8751
8752 bool
8753 output_addr_const_extra (FILE *file, rtx x)
8754 {
8755 rtx op;
8756
8757 if (GET_CODE (x) != UNSPEC)
8758 return false;
8759
8760 op = XVECEXP (x, 0, 0);
8761 switch (XINT (x, 1))
8762 {
8763 case UNSPEC_GOTTPOFF:
8764 output_addr_const (file, op);
8765 /* FIXME: This might be @TPOFF in Sun ld. */
8766 fputs ("@GOTTPOFF", file);
8767 break;
8768 case UNSPEC_TPOFF:
8769 output_addr_const (file, op);
8770 fputs ("@TPOFF", file);
8771 break;
8772 case UNSPEC_NTPOFF:
8773 output_addr_const (file, op);
8774 if (TARGET_64BIT)
8775 fputs ("@TPOFF", file);
8776 else
8777 fputs ("@NTPOFF", file);
8778 break;
8779 case UNSPEC_DTPOFF:
8780 output_addr_const (file, op);
8781 fputs ("@DTPOFF", file);
8782 break;
8783 case UNSPEC_GOTNTPOFF:
8784 output_addr_const (file, op);
8785 if (TARGET_64BIT)
8786 fputs ("@GOTTPOFF(%rip)", file);
8787 else
8788 fputs ("@GOTNTPOFF", file);
8789 break;
8790 case UNSPEC_INDNTPOFF:
8791 output_addr_const (file, op);
8792 fputs ("@INDNTPOFF", file);
8793 break;
8794
8795 default:
8796 return false;
8797 }
8798
8799 return true;
8800 }
8801 \f
8802 /* Split one or more DImode RTL references into pairs of SImode
8803 references. The RTL can be REG, offsettable MEM, integer constant, or
8804 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8805 split and "num" is its length. lo_half and hi_half are output arrays
8806 that parallel "operands". */
8807
8808 void
8809 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8810 {
8811 while (num--)
8812 {
8813 rtx op = operands[num];
8814
8815 /* simplify_subreg refuse to split volatile memory addresses,
8816 but we still have to handle it. */
8817 if (MEM_P (op))
8818 {
8819 lo_half[num] = adjust_address (op, SImode, 0);
8820 hi_half[num] = adjust_address (op, SImode, 4);
8821 }
8822 else
8823 {
8824 lo_half[num] = simplify_gen_subreg (SImode, op,
8825 GET_MODE (op) == VOIDmode
8826 ? DImode : GET_MODE (op), 0);
8827 hi_half[num] = simplify_gen_subreg (SImode, op,
8828 GET_MODE (op) == VOIDmode
8829 ? DImode : GET_MODE (op), 4);
8830 }
8831 }
8832 }
8833 /* Split one or more TImode RTL references into pairs of DImode
8834 references. The RTL can be REG, offsettable MEM, integer constant, or
8835 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8836 split and "num" is its length. lo_half and hi_half are output arrays
8837 that parallel "operands". */
8838
8839 void
8840 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8841 {
8842 while (num--)
8843 {
8844 rtx op = operands[num];
8845
8846 /* simplify_subreg refuse to split volatile memory addresses, but we
8847 still have to handle it. */
8848 if (MEM_P (op))
8849 {
8850 lo_half[num] = adjust_address (op, DImode, 0);
8851 hi_half[num] = adjust_address (op, DImode, 8);
8852 }
8853 else
8854 {
8855 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8856 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8857 }
8858 }
8859 }
8860 \f
8861 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8862 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8863 is the expression of the binary operation. The output may either be
8864 emitted here, or returned to the caller, like all output_* functions.
8865
8866 There is no guarantee that the operands are the same mode, as they
8867 might be within FLOAT or FLOAT_EXTEND expressions. */
8868
8869 #ifndef SYSV386_COMPAT
8870 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8871 wants to fix the assemblers because that causes incompatibility
8872 with gcc. No-one wants to fix gcc because that causes
8873 incompatibility with assemblers... You can use the option of
8874 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8875 #define SYSV386_COMPAT 1
8876 #endif
8877
8878 const char *
8879 output_387_binary_op (rtx insn, rtx *operands)
8880 {
8881 static char buf[30];
8882 const char *p;
8883 const char *ssep;
8884 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8885
8886 #ifdef ENABLE_CHECKING
8887 /* Even if we do not want to check the inputs, this documents input
8888 constraints. Which helps in understanding the following code. */
8889 if (STACK_REG_P (operands[0])
8890 && ((REG_P (operands[1])
8891 && REGNO (operands[0]) == REGNO (operands[1])
8892 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8893 || (REG_P (operands[2])
8894 && REGNO (operands[0]) == REGNO (operands[2])
8895 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8896 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8897 ; /* ok */
8898 else
8899 gcc_assert (is_sse);
8900 #endif
8901
8902 switch (GET_CODE (operands[3]))
8903 {
8904 case PLUS:
8905 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8906 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8907 p = "fiadd";
8908 else
8909 p = "fadd";
8910 ssep = "add";
8911 break;
8912
8913 case MINUS:
8914 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8915 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8916 p = "fisub";
8917 else
8918 p = "fsub";
8919 ssep = "sub";
8920 break;
8921
8922 case MULT:
8923 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8924 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8925 p = "fimul";
8926 else
8927 p = "fmul";
8928 ssep = "mul";
8929 break;
8930
8931 case DIV:
8932 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8933 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8934 p = "fidiv";
8935 else
8936 p = "fdiv";
8937 ssep = "div";
8938 break;
8939
8940 default:
8941 gcc_unreachable ();
8942 }
8943
8944 if (is_sse)
8945 {
8946 strcpy (buf, ssep);
8947 if (GET_MODE (operands[0]) == SFmode)
8948 strcat (buf, "ss\t{%2, %0|%0, %2}");
8949 else
8950 strcat (buf, "sd\t{%2, %0|%0, %2}");
8951 return buf;
8952 }
8953 strcpy (buf, p);
8954
8955 switch (GET_CODE (operands[3]))
8956 {
8957 case MULT:
8958 case PLUS:
8959 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8960 {
8961 rtx temp = operands[2];
8962 operands[2] = operands[1];
8963 operands[1] = temp;
8964 }
8965
8966 /* know operands[0] == operands[1]. */
8967
8968 if (MEM_P (operands[2]))
8969 {
8970 p = "%z2\t%2";
8971 break;
8972 }
8973
8974 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8975 {
8976 if (STACK_TOP_P (operands[0]))
8977 /* How is it that we are storing to a dead operand[2]?
8978 Well, presumably operands[1] is dead too. We can't
8979 store the result to st(0) as st(0) gets popped on this
8980 instruction. Instead store to operands[2] (which I
8981 think has to be st(1)). st(1) will be popped later.
8982 gcc <= 2.8.1 didn't have this check and generated
8983 assembly code that the Unixware assembler rejected. */
8984 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8985 else
8986 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8987 break;
8988 }
8989
8990 if (STACK_TOP_P (operands[0]))
8991 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8992 else
8993 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8994 break;
8995
8996 case MINUS:
8997 case DIV:
8998 if (MEM_P (operands[1]))
8999 {
9000 p = "r%z1\t%1";
9001 break;
9002 }
9003
9004 if (MEM_P (operands[2]))
9005 {
9006 p = "%z2\t%2";
9007 break;
9008 }
9009
9010 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9011 {
9012 #if SYSV386_COMPAT
9013 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9014 derived assemblers, confusingly reverse the direction of
9015 the operation for fsub{r} and fdiv{r} when the
9016 destination register is not st(0). The Intel assembler
9017 doesn't have this brain damage. Read !SYSV386_COMPAT to
9018 figure out what the hardware really does. */
9019 if (STACK_TOP_P (operands[0]))
9020 p = "{p\t%0, %2|rp\t%2, %0}";
9021 else
9022 p = "{rp\t%2, %0|p\t%0, %2}";
9023 #else
9024 if (STACK_TOP_P (operands[0]))
9025 /* As above for fmul/fadd, we can't store to st(0). */
9026 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9027 else
9028 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9029 #endif
9030 break;
9031 }
9032
9033 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9034 {
9035 #if SYSV386_COMPAT
9036 if (STACK_TOP_P (operands[0]))
9037 p = "{rp\t%0, %1|p\t%1, %0}";
9038 else
9039 p = "{p\t%1, %0|rp\t%0, %1}";
9040 #else
9041 if (STACK_TOP_P (operands[0]))
9042 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9043 else
9044 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9045 #endif
9046 break;
9047 }
9048
9049 if (STACK_TOP_P (operands[0]))
9050 {
9051 if (STACK_TOP_P (operands[1]))
9052 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9053 else
9054 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9055 break;
9056 }
9057 else if (STACK_TOP_P (operands[1]))
9058 {
9059 #if SYSV386_COMPAT
9060 p = "{\t%1, %0|r\t%0, %1}";
9061 #else
9062 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9063 #endif
9064 }
9065 else
9066 {
9067 #if SYSV386_COMPAT
9068 p = "{r\t%2, %0|\t%0, %2}";
9069 #else
9070 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9071 #endif
9072 }
9073 break;
9074
9075 default:
9076 gcc_unreachable ();
9077 }
9078
9079 strcat (buf, p);
9080 return buf;
9081 }
9082
9083 /* Return needed mode for entity in optimize_mode_switching pass. */
9084
9085 int
9086 ix86_mode_needed (int entity, rtx insn)
9087 {
9088 enum attr_i387_cw mode;
9089
9090 /* The mode UNINITIALIZED is used to store control word after a
9091 function call or ASM pattern. The mode ANY specify that function
9092 has no requirements on the control word and make no changes in the
9093 bits we are interested in. */
9094
9095 if (CALL_P (insn)
9096 || (NONJUMP_INSN_P (insn)
9097 && (asm_noperands (PATTERN (insn)) >= 0
9098 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9099 return I387_CW_UNINITIALIZED;
9100
9101 if (recog_memoized (insn) < 0)
9102 return I387_CW_ANY;
9103
9104 mode = get_attr_i387_cw (insn);
9105
9106 switch (entity)
9107 {
9108 case I387_TRUNC:
9109 if (mode == I387_CW_TRUNC)
9110 return mode;
9111 break;
9112
9113 case I387_FLOOR:
9114 if (mode == I387_CW_FLOOR)
9115 return mode;
9116 break;
9117
9118 case I387_CEIL:
9119 if (mode == I387_CW_CEIL)
9120 return mode;
9121 break;
9122
9123 case I387_MASK_PM:
9124 if (mode == I387_CW_MASK_PM)
9125 return mode;
9126 break;
9127
9128 default:
9129 gcc_unreachable ();
9130 }
9131
9132 return I387_CW_ANY;
9133 }
9134
9135 /* Output code to initialize control word copies used by trunc?f?i and
9136 rounding patterns. CURRENT_MODE is set to current control word,
9137 while NEW_MODE is set to new control word. */
9138
9139 void
9140 emit_i387_cw_initialization (int mode)
9141 {
9142 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9143 rtx new_mode;
9144
9145 int slot;
9146
9147 rtx reg = gen_reg_rtx (HImode);
9148
9149 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9150 emit_move_insn (reg, copy_rtx (stored_mode));
9151
9152 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9153 {
9154 switch (mode)
9155 {
9156 case I387_CW_TRUNC:
9157 /* round toward zero (truncate) */
9158 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9159 slot = SLOT_CW_TRUNC;
9160 break;
9161
9162 case I387_CW_FLOOR:
9163 /* round down toward -oo */
9164 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9165 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9166 slot = SLOT_CW_FLOOR;
9167 break;
9168
9169 case I387_CW_CEIL:
9170 /* round up toward +oo */
9171 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9172 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9173 slot = SLOT_CW_CEIL;
9174 break;
9175
9176 case I387_CW_MASK_PM:
9177 /* mask precision exception for nearbyint() */
9178 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9179 slot = SLOT_CW_MASK_PM;
9180 break;
9181
9182 default:
9183 gcc_unreachable ();
9184 }
9185 }
9186 else
9187 {
9188 switch (mode)
9189 {
9190 case I387_CW_TRUNC:
9191 /* round toward zero (truncate) */
9192 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9193 slot = SLOT_CW_TRUNC;
9194 break;
9195
9196 case I387_CW_FLOOR:
9197 /* round down toward -oo */
9198 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9199 slot = SLOT_CW_FLOOR;
9200 break;
9201
9202 case I387_CW_CEIL:
9203 /* round up toward +oo */
9204 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9205 slot = SLOT_CW_CEIL;
9206 break;
9207
9208 case I387_CW_MASK_PM:
9209 /* mask precision exception for nearbyint() */
9210 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9211 slot = SLOT_CW_MASK_PM;
9212 break;
9213
9214 default:
9215 gcc_unreachable ();
9216 }
9217 }
9218
9219 gcc_assert (slot < MAX_386_STACK_LOCALS);
9220
9221 new_mode = assign_386_stack_local (HImode, slot);
9222 emit_move_insn (new_mode, reg);
9223 }
9224
9225 /* Output code for INSN to convert a float to a signed int. OPERANDS
9226 are the insn operands. The output may be [HSD]Imode and the input
9227 operand may be [SDX]Fmode. */
9228
9229 const char *
9230 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9231 {
9232 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9233 int dimode_p = GET_MODE (operands[0]) == DImode;
9234 int round_mode = get_attr_i387_cw (insn);
9235
9236 /* Jump through a hoop or two for DImode, since the hardware has no
9237 non-popping instruction. We used to do this a different way, but
9238 that was somewhat fragile and broke with post-reload splitters. */
9239 if ((dimode_p || fisttp) && !stack_top_dies)
9240 output_asm_insn ("fld\t%y1", operands);
9241
9242 gcc_assert (STACK_TOP_P (operands[1]));
9243 gcc_assert (MEM_P (operands[0]));
9244
9245 if (fisttp)
9246 output_asm_insn ("fisttp%z0\t%0", operands);
9247 else
9248 {
9249 if (round_mode != I387_CW_ANY)
9250 output_asm_insn ("fldcw\t%3", operands);
9251 if (stack_top_dies || dimode_p)
9252 output_asm_insn ("fistp%z0\t%0", operands);
9253 else
9254 output_asm_insn ("fist%z0\t%0", operands);
9255 if (round_mode != I387_CW_ANY)
9256 output_asm_insn ("fldcw\t%2", operands);
9257 }
9258
9259 return "";
9260 }
9261
9262 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9263 have the values zero or one, indicates the ffreep insn's operand
9264 from the OPERANDS array. */
9265
9266 static const char *
9267 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9268 {
9269 if (TARGET_USE_FFREEP)
9270 #if HAVE_AS_IX86_FFREEP
9271 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9272 #else
9273 {
9274 static char retval[] = ".word\t0xc_df";
9275 int regno = REGNO (operands[opno]);
9276
9277 gcc_assert (FP_REGNO_P (regno));
9278
9279 retval[9] = '0' + (regno - FIRST_STACK_REG);
9280 return retval;
9281 }
9282 #endif
9283
9284 return opno ? "fstp\t%y1" : "fstp\t%y0";
9285 }
9286
9287
9288 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9289 should be used. UNORDERED_P is true when fucom should be used. */
9290
9291 const char *
9292 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9293 {
9294 int stack_top_dies;
9295 rtx cmp_op0, cmp_op1;
9296 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9297
9298 if (eflags_p)
9299 {
9300 cmp_op0 = operands[0];
9301 cmp_op1 = operands[1];
9302 }
9303 else
9304 {
9305 cmp_op0 = operands[1];
9306 cmp_op1 = operands[2];
9307 }
9308
9309 if (is_sse)
9310 {
9311 if (GET_MODE (operands[0]) == SFmode)
9312 if (unordered_p)
9313 return "ucomiss\t{%1, %0|%0, %1}";
9314 else
9315 return "comiss\t{%1, %0|%0, %1}";
9316 else
9317 if (unordered_p)
9318 return "ucomisd\t{%1, %0|%0, %1}";
9319 else
9320 return "comisd\t{%1, %0|%0, %1}";
9321 }
9322
9323 gcc_assert (STACK_TOP_P (cmp_op0));
9324
9325 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9326
9327 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9328 {
9329 if (stack_top_dies)
9330 {
9331 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9332 return output_387_ffreep (operands, 1);
9333 }
9334 else
9335 return "ftst\n\tfnstsw\t%0";
9336 }
9337
9338 if (STACK_REG_P (cmp_op1)
9339 && stack_top_dies
9340 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9341 && REGNO (cmp_op1) != FIRST_STACK_REG)
9342 {
9343 /* If both the top of the 387 stack dies, and the other operand
9344 is also a stack register that dies, then this must be a
9345 `fcompp' float compare */
9346
9347 if (eflags_p)
9348 {
9349 /* There is no double popping fcomi variant. Fortunately,
9350 eflags is immune from the fstp's cc clobbering. */
9351 if (unordered_p)
9352 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9353 else
9354 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9355 return output_387_ffreep (operands, 0);
9356 }
9357 else
9358 {
9359 if (unordered_p)
9360 return "fucompp\n\tfnstsw\t%0";
9361 else
9362 return "fcompp\n\tfnstsw\t%0";
9363 }
9364 }
9365 else
9366 {
9367 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9368
9369 static const char * const alt[16] =
9370 {
9371 "fcom%z2\t%y2\n\tfnstsw\t%0",
9372 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9373 "fucom%z2\t%y2\n\tfnstsw\t%0",
9374 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9375
9376 "ficom%z2\t%y2\n\tfnstsw\t%0",
9377 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9378 NULL,
9379 NULL,
9380
9381 "fcomi\t{%y1, %0|%0, %y1}",
9382 "fcomip\t{%y1, %0|%0, %y1}",
9383 "fucomi\t{%y1, %0|%0, %y1}",
9384 "fucomip\t{%y1, %0|%0, %y1}",
9385
9386 NULL,
9387 NULL,
9388 NULL,
9389 NULL
9390 };
9391
9392 int mask;
9393 const char *ret;
9394
9395 mask = eflags_p << 3;
9396 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9397 mask |= unordered_p << 1;
9398 mask |= stack_top_dies;
9399
9400 gcc_assert (mask < 16);
9401 ret = alt[mask];
9402 gcc_assert (ret);
9403
9404 return ret;
9405 }
9406 }
9407
9408 void
9409 ix86_output_addr_vec_elt (FILE *file, int value)
9410 {
9411 const char *directive = ASM_LONG;
9412
9413 #ifdef ASM_QUAD
9414 if (TARGET_64BIT)
9415 directive = ASM_QUAD;
9416 #else
9417 gcc_assert (!TARGET_64BIT);
9418 #endif
9419
9420 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9421 }
9422
9423 void
9424 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9425 {
9426 const char *directive = ASM_LONG;
9427
9428 #ifdef ASM_QUAD
9429 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9430 directive = ASM_QUAD;
9431 #else
9432 gcc_assert (!TARGET_64BIT);
9433 #endif
9434 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9435 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9436 fprintf (file, "%s%s%d-%s%d\n",
9437 directive, LPREFIX, value, LPREFIX, rel);
9438 else if (HAVE_AS_GOTOFF_IN_DATA)
9439 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9440 #if TARGET_MACHO
9441 else if (TARGET_MACHO)
9442 {
9443 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9444 machopic_output_function_base_name (file);
9445 fprintf(file, "\n");
9446 }
9447 #endif
9448 else
9449 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9450 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9451 }
9452 \f
9453 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9454 for the target. */
9455
9456 void
9457 ix86_expand_clear (rtx dest)
9458 {
9459 rtx tmp;
9460
9461 /* We play register width games, which are only valid after reload. */
9462 gcc_assert (reload_completed);
9463
9464 /* Avoid HImode and its attendant prefix byte. */
9465 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9466 dest = gen_rtx_REG (SImode, REGNO (dest));
9467
9468 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9469
9470 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9471 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9472 {
9473 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9474 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9475 }
9476
9477 emit_insn (tmp);
9478 }
9479
9480 /* X is an unchanging MEM. If it is a constant pool reference, return
9481 the constant pool rtx, else NULL. */
9482
9483 rtx
9484 maybe_get_pool_constant (rtx x)
9485 {
9486 x = ix86_delegitimize_address (XEXP (x, 0));
9487
9488 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9489 return get_pool_constant (x);
9490
9491 return NULL_RTX;
9492 }
9493
9494 void
9495 ix86_expand_move (enum machine_mode mode, rtx operands[])
9496 {
9497 int strict = (reload_in_progress || reload_completed);
9498 rtx op0, op1;
9499 enum tls_model model;
9500
9501 op0 = operands[0];
9502 op1 = operands[1];
9503
9504 if (GET_CODE (op1) == SYMBOL_REF)
9505 {
9506 model = SYMBOL_REF_TLS_MODEL (op1);
9507 if (model)
9508 {
9509 op1 = legitimize_tls_address (op1, model, true);
9510 op1 = force_operand (op1, op0);
9511 if (op1 == op0)
9512 return;
9513 }
9514 }
9515 else if (GET_CODE (op1) == CONST
9516 && GET_CODE (XEXP (op1, 0)) == PLUS
9517 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9518 {
9519 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9520 if (model)
9521 {
9522 rtx addend = XEXP (XEXP (op1, 0), 1);
9523 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9524 op1 = force_operand (op1, NULL);
9525 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9526 op0, 1, OPTAB_DIRECT);
9527 if (op1 == op0)
9528 return;
9529 }
9530 }
9531
9532 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9533 {
9534 if (TARGET_MACHO && !TARGET_64BIT)
9535 {
9536 #if TARGET_MACHO
9537 if (MACHOPIC_PURE)
9538 {
9539 rtx temp = ((reload_in_progress
9540 || ((op0 && REG_P (op0))
9541 && mode == Pmode))
9542 ? op0 : gen_reg_rtx (Pmode));
9543 op1 = machopic_indirect_data_reference (op1, temp);
9544 op1 = machopic_legitimize_pic_address (op1, mode,
9545 temp == op1 ? 0 : temp);
9546 }
9547 else if (MACHOPIC_INDIRECT)
9548 op1 = machopic_indirect_data_reference (op1, 0);
9549 if (op0 == op1)
9550 return;
9551 #endif
9552 }
9553 else
9554 {
9555 if (MEM_P (op0))
9556 op1 = force_reg (Pmode, op1);
9557 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9558 {
9559 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9560 op1 = legitimize_pic_address (op1, reg);
9561 if (op0 == op1)
9562 return;
9563 }
9564 }
9565 }
9566 else
9567 {
9568 if (MEM_P (op0)
9569 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9570 || !push_operand (op0, mode))
9571 && MEM_P (op1))
9572 op1 = force_reg (mode, op1);
9573
9574 if (push_operand (op0, mode)
9575 && ! general_no_elim_operand (op1, mode))
9576 op1 = copy_to_mode_reg (mode, op1);
9577
9578 /* Force large constants in 64bit compilation into register
9579 to get them CSEed. */
9580 if (TARGET_64BIT && mode == DImode
9581 && immediate_operand (op1, mode)
9582 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9583 && !register_operand (op0, mode)
9584 && optimize && !reload_completed && !reload_in_progress)
9585 op1 = copy_to_mode_reg (mode, op1);
9586
9587 if (FLOAT_MODE_P (mode))
9588 {
9589 /* If we are loading a floating point constant to a register,
9590 force the value to memory now, since we'll get better code
9591 out the back end. */
9592
9593 if (strict)
9594 ;
9595 else if (GET_CODE (op1) == CONST_DOUBLE)
9596 {
9597 op1 = validize_mem (force_const_mem (mode, op1));
9598 if (!register_operand (op0, mode))
9599 {
9600 rtx temp = gen_reg_rtx (mode);
9601 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9602 emit_move_insn (op0, temp);
9603 return;
9604 }
9605 }
9606 }
9607 }
9608
9609 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9610 }
9611
9612 void
9613 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9614 {
9615 rtx op0 = operands[0], op1 = operands[1];
9616
9617 /* Force constants other than zero into memory. We do not know how
9618 the instructions used to build constants modify the upper 64 bits
9619 of the register, once we have that information we may be able
9620 to handle some of them more efficiently. */
9621 if ((reload_in_progress | reload_completed) == 0
9622 && register_operand (op0, mode)
9623 && CONSTANT_P (op1)
9624 && standard_sse_constant_p (op1) <= 0)
9625 op1 = validize_mem (force_const_mem (mode, op1));
9626
9627 /* Make operand1 a register if it isn't already. */
9628 if (!no_new_pseudos
9629 && !register_operand (op0, mode)
9630 && !register_operand (op1, mode))
9631 {
9632 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9633 return;
9634 }
9635
9636 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9637 }
9638
9639 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9640 straight to ix86_expand_vector_move. */
9641 /* Code generation for scalar reg-reg moves of single and double precision data:
9642 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9643 movaps reg, reg
9644 else
9645 movss reg, reg
9646 if (x86_sse_partial_reg_dependency == true)
9647 movapd reg, reg
9648 else
9649 movsd reg, reg
9650
9651 Code generation for scalar loads of double precision data:
9652 if (x86_sse_split_regs == true)
9653 movlpd mem, reg (gas syntax)
9654 else
9655 movsd mem, reg
9656
9657 Code generation for unaligned packed loads of single precision data
9658 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9659 if (x86_sse_unaligned_move_optimal)
9660 movups mem, reg
9661
9662 if (x86_sse_partial_reg_dependency == true)
9663 {
9664 xorps reg, reg
9665 movlps mem, reg
9666 movhps mem+8, reg
9667 }
9668 else
9669 {
9670 movlps mem, reg
9671 movhps mem+8, reg
9672 }
9673
9674 Code generation for unaligned packed loads of double precision data
9675 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9676 if (x86_sse_unaligned_move_optimal)
9677 movupd mem, reg
9678
9679 if (x86_sse_split_regs == true)
9680 {
9681 movlpd mem, reg
9682 movhpd mem+8, reg
9683 }
9684 else
9685 {
9686 movsd mem, reg
9687 movhpd mem+8, reg
9688 }
9689 */
9690
9691 void
9692 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9693 {
9694 rtx op0, op1, m;
9695
9696 op0 = operands[0];
9697 op1 = operands[1];
9698
9699 if (MEM_P (op1))
9700 {
9701 /* If we're optimizing for size, movups is the smallest. */
9702 if (optimize_size)
9703 {
9704 op0 = gen_lowpart (V4SFmode, op0);
9705 op1 = gen_lowpart (V4SFmode, op1);
9706 emit_insn (gen_sse_movups (op0, op1));
9707 return;
9708 }
9709
9710 /* ??? If we have typed data, then it would appear that using
9711 movdqu is the only way to get unaligned data loaded with
9712 integer type. */
9713 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9714 {
9715 op0 = gen_lowpart (V16QImode, op0);
9716 op1 = gen_lowpart (V16QImode, op1);
9717 emit_insn (gen_sse2_movdqu (op0, op1));
9718 return;
9719 }
9720
9721 if (TARGET_SSE2 && mode == V2DFmode)
9722 {
9723 rtx zero;
9724
9725 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9726 {
9727 op0 = gen_lowpart (V2DFmode, op0);
9728 op1 = gen_lowpart (V2DFmode, op1);
9729 emit_insn (gen_sse2_movupd (op0, op1));
9730 return;
9731 }
9732
9733 /* When SSE registers are split into halves, we can avoid
9734 writing to the top half twice. */
9735 if (TARGET_SSE_SPLIT_REGS)
9736 {
9737 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9738 zero = op0;
9739 }
9740 else
9741 {
9742 /* ??? Not sure about the best option for the Intel chips.
9743 The following would seem to satisfy; the register is
9744 entirely cleared, breaking the dependency chain. We
9745 then store to the upper half, with a dependency depth
9746 of one. A rumor has it that Intel recommends two movsd
9747 followed by an unpacklpd, but this is unconfirmed. And
9748 given that the dependency depth of the unpacklpd would
9749 still be one, I'm not sure why this would be better. */
9750 zero = CONST0_RTX (V2DFmode);
9751 }
9752
9753 m = adjust_address (op1, DFmode, 0);
9754 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9755 m = adjust_address (op1, DFmode, 8);
9756 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9757 }
9758 else
9759 {
9760 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9761 {
9762 op0 = gen_lowpart (V4SFmode, op0);
9763 op1 = gen_lowpart (V4SFmode, op1);
9764 emit_insn (gen_sse_movups (op0, op1));
9765 return;
9766 }
9767
9768 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9769 emit_move_insn (op0, CONST0_RTX (mode));
9770 else
9771 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9772
9773 if (mode != V4SFmode)
9774 op0 = gen_lowpart (V4SFmode, op0);
9775 m = adjust_address (op1, V2SFmode, 0);
9776 emit_insn (gen_sse_loadlps (op0, op0, m));
9777 m = adjust_address (op1, V2SFmode, 8);
9778 emit_insn (gen_sse_loadhps (op0, op0, m));
9779 }
9780 }
9781 else if (MEM_P (op0))
9782 {
9783 /* If we're optimizing for size, movups is the smallest. */
9784 if (optimize_size)
9785 {
9786 op0 = gen_lowpart (V4SFmode, op0);
9787 op1 = gen_lowpart (V4SFmode, op1);
9788 emit_insn (gen_sse_movups (op0, op1));
9789 return;
9790 }
9791
9792 /* ??? Similar to above, only less clear because of quote
9793 typeless stores unquote. */
9794 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9795 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9796 {
9797 op0 = gen_lowpart (V16QImode, op0);
9798 op1 = gen_lowpart (V16QImode, op1);
9799 emit_insn (gen_sse2_movdqu (op0, op1));
9800 return;
9801 }
9802
9803 if (TARGET_SSE2 && mode == V2DFmode)
9804 {
9805 m = adjust_address (op0, DFmode, 0);
9806 emit_insn (gen_sse2_storelpd (m, op1));
9807 m = adjust_address (op0, DFmode, 8);
9808 emit_insn (gen_sse2_storehpd (m, op1));
9809 }
9810 else
9811 {
9812 if (mode != V4SFmode)
9813 op1 = gen_lowpart (V4SFmode, op1);
9814 m = adjust_address (op0, V2SFmode, 0);
9815 emit_insn (gen_sse_storelps (m, op1));
9816 m = adjust_address (op0, V2SFmode, 8);
9817 emit_insn (gen_sse_storehps (m, op1));
9818 }
9819 }
9820 else
9821 gcc_unreachable ();
9822 }
9823
9824 /* Expand a push in MODE. This is some mode for which we do not support
9825 proper push instructions, at least from the registers that we expect
9826 the value to live in. */
9827
9828 void
9829 ix86_expand_push (enum machine_mode mode, rtx x)
9830 {
9831 rtx tmp;
9832
9833 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9834 GEN_INT (-GET_MODE_SIZE (mode)),
9835 stack_pointer_rtx, 1, OPTAB_DIRECT);
9836 if (tmp != stack_pointer_rtx)
9837 emit_move_insn (stack_pointer_rtx, tmp);
9838
9839 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9840 emit_move_insn (tmp, x);
9841 }
9842
9843 /* Helper function of ix86_fixup_binary_operands to canonicalize
9844 operand order. Returns true if the operands should be swapped. */
9845
9846 static bool
9847 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9848 rtx operands[])
9849 {
9850 rtx dst = operands[0];
9851 rtx src1 = operands[1];
9852 rtx src2 = operands[2];
9853
9854 /* If the operation is not commutative, we can't do anything. */
9855 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9856 return false;
9857
9858 /* Highest priority is that src1 should match dst. */
9859 if (rtx_equal_p (dst, src1))
9860 return false;
9861 if (rtx_equal_p (dst, src2))
9862 return true;
9863
9864 /* Next highest priority is that immediate constants come second. */
9865 if (immediate_operand (src2, mode))
9866 return false;
9867 if (immediate_operand (src1, mode))
9868 return true;
9869
9870 /* Lowest priority is that memory references should come second. */
9871 if (MEM_P (src2))
9872 return false;
9873 if (MEM_P (src1))
9874 return true;
9875
9876 return false;
9877 }
9878
9879
9880 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9881 destination to use for the operation. If different from the true
9882 destination in operands[0], a copy operation will be required. */
9883
9884 rtx
9885 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9886 rtx operands[])
9887 {
9888 rtx dst = operands[0];
9889 rtx src1 = operands[1];
9890 rtx src2 = operands[2];
9891
9892 /* Canonicalize operand order. */
9893 if (ix86_swap_binary_operands_p (code, mode, operands))
9894 {
9895 rtx temp = src1;
9896 src1 = src2;
9897 src2 = temp;
9898 }
9899
9900 /* Both source operands cannot be in memory. */
9901 if (MEM_P (src1) && MEM_P (src2))
9902 {
9903 /* Optimization: Only read from memory once. */
9904 if (rtx_equal_p (src1, src2))
9905 {
9906 src2 = force_reg (mode, src2);
9907 src1 = src2;
9908 }
9909 else
9910 src2 = force_reg (mode, src2);
9911 }
9912
9913 /* If the destination is memory, and we do not have matching source
9914 operands, do things in registers. */
9915 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9916 dst = gen_reg_rtx (mode);
9917
9918 /* Source 1 cannot be a constant. */
9919 if (CONSTANT_P (src1))
9920 src1 = force_reg (mode, src1);
9921
9922 /* Source 1 cannot be a non-matching memory. */
9923 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9924 src1 = force_reg (mode, src1);
9925
9926 operands[1] = src1;
9927 operands[2] = src2;
9928 return dst;
9929 }
9930
9931 /* Similarly, but assume that the destination has already been
9932 set up properly. */
9933
9934 void
9935 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9936 enum machine_mode mode, rtx operands[])
9937 {
9938 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9939 gcc_assert (dst == operands[0]);
9940 }
9941
9942 /* Attempt to expand a binary operator. Make the expansion closer to the
9943 actual machine, then just general_operand, which will allow 3 separate
9944 memory references (one output, two input) in a single insn. */
9945
9946 void
9947 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9948 rtx operands[])
9949 {
9950 rtx src1, src2, dst, op, clob;
9951
9952 dst = ix86_fixup_binary_operands (code, mode, operands);
9953 src1 = operands[1];
9954 src2 = operands[2];
9955
9956 /* Emit the instruction. */
9957
9958 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9959 if (reload_in_progress)
9960 {
9961 /* Reload doesn't know about the flags register, and doesn't know that
9962 it doesn't want to clobber it. We can only do this with PLUS. */
9963 gcc_assert (code == PLUS);
9964 emit_insn (op);
9965 }
9966 else
9967 {
9968 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9969 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9970 }
9971
9972 /* Fix up the destination if needed. */
9973 if (dst != operands[0])
9974 emit_move_insn (operands[0], dst);
9975 }
9976
9977 /* Return TRUE or FALSE depending on whether the binary operator meets the
9978 appropriate constraints. */
9979
9980 int
9981 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9982 rtx operands[3])
9983 {
9984 rtx dst = operands[0];
9985 rtx src1 = operands[1];
9986 rtx src2 = operands[2];
9987
9988 /* Both source operands cannot be in memory. */
9989 if (MEM_P (src1) && MEM_P (src2))
9990 return 0;
9991
9992 /* Canonicalize operand order for commutative operators. */
9993 if (ix86_swap_binary_operands_p (code, mode, operands))
9994 {
9995 rtx temp = src1;
9996 src1 = src2;
9997 src2 = temp;
9998 }
9999
10000 /* If the destination is memory, we must have a matching source operand. */
10001 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10002 return 0;
10003
10004 /* Source 1 cannot be a constant. */
10005 if (CONSTANT_P (src1))
10006 return 0;
10007
10008 /* Source 1 cannot be a non-matching memory. */
10009 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10010 return 0;
10011
10012 return 1;
10013 }
10014
10015 /* Attempt to expand a unary operator. Make the expansion closer to the
10016 actual machine, then just general_operand, which will allow 2 separate
10017 memory references (one output, one input) in a single insn. */
10018
10019 void
10020 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10021 rtx operands[])
10022 {
10023 int matching_memory;
10024 rtx src, dst, op, clob;
10025
10026 dst = operands[0];
10027 src = operands[1];
10028
10029 /* If the destination is memory, and we do not have matching source
10030 operands, do things in registers. */
10031 matching_memory = 0;
10032 if (MEM_P (dst))
10033 {
10034 if (rtx_equal_p (dst, src))
10035 matching_memory = 1;
10036 else
10037 dst = gen_reg_rtx (mode);
10038 }
10039
10040 /* When source operand is memory, destination must match. */
10041 if (MEM_P (src) && !matching_memory)
10042 src = force_reg (mode, src);
10043
10044 /* Emit the instruction. */
10045
10046 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10047 if (reload_in_progress || code == NOT)
10048 {
10049 /* Reload doesn't know about the flags register, and doesn't know that
10050 it doesn't want to clobber it. */
10051 gcc_assert (code == NOT);
10052 emit_insn (op);
10053 }
10054 else
10055 {
10056 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10057 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10058 }
10059
10060 /* Fix up the destination if needed. */
10061 if (dst != operands[0])
10062 emit_move_insn (operands[0], dst);
10063 }
10064
10065 /* Return TRUE or FALSE depending on whether the unary operator meets the
10066 appropriate constraints. */
10067
10068 int
10069 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10070 enum machine_mode mode ATTRIBUTE_UNUSED,
10071 rtx operands[2] ATTRIBUTE_UNUSED)
10072 {
10073 /* If one of operands is memory, source and destination must match. */
10074 if ((MEM_P (operands[0])
10075 || MEM_P (operands[1]))
10076 && ! rtx_equal_p (operands[0], operands[1]))
10077 return FALSE;
10078 return TRUE;
10079 }
10080
10081 /* Post-reload splitter for converting an SF or DFmode value in an
10082 SSE register into an unsigned SImode. */
10083
10084 void
10085 ix86_split_convert_uns_si_sse (rtx operands[])
10086 {
10087 enum machine_mode vecmode;
10088 rtx value, large, zero_or_two31, input, two31, x;
10089
10090 large = operands[1];
10091 zero_or_two31 = operands[2];
10092 input = operands[3];
10093 two31 = operands[4];
10094 vecmode = GET_MODE (large);
10095 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10096
10097 /* Load up the value into the low element. We must ensure that the other
10098 elements are valid floats -- zero is the easiest such value. */
10099 if (MEM_P (input))
10100 {
10101 if (vecmode == V4SFmode)
10102 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10103 else
10104 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10105 }
10106 else
10107 {
10108 input = gen_rtx_REG (vecmode, REGNO (input));
10109 emit_move_insn (value, CONST0_RTX (vecmode));
10110 if (vecmode == V4SFmode)
10111 emit_insn (gen_sse_movss (value, value, input));
10112 else
10113 emit_insn (gen_sse2_movsd (value, value, input));
10114 }
10115
10116 emit_move_insn (large, two31);
10117 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10118
10119 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10120 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10121
10122 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10123 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10124
10125 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10126 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10127
10128 large = gen_rtx_REG (V4SImode, REGNO (large));
10129 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10130
10131 x = gen_rtx_REG (V4SImode, REGNO (value));
10132 if (vecmode == V4SFmode)
10133 emit_insn (gen_sse2_cvttps2dq (x, value));
10134 else
10135 emit_insn (gen_sse2_cvttpd2dq (x, value));
10136 value = x;
10137
10138 emit_insn (gen_xorv4si3 (value, value, large));
10139 }
10140
10141 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10142 Expects the 64-bit DImode to be supplied in a pair of integral
10143 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10144 -mfpmath=sse, !optimize_size only. */
10145
10146 void
10147 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10148 {
10149 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10150 rtx int_xmm, fp_xmm;
10151 rtx biases, exponents;
10152 rtx x;
10153
10154 int_xmm = gen_reg_rtx (V4SImode);
10155 if (TARGET_INTER_UNIT_MOVES)
10156 emit_insn (gen_movdi_to_sse (int_xmm, input));
10157 else if (TARGET_SSE_SPLIT_REGS)
10158 {
10159 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10160 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10161 }
10162 else
10163 {
10164 x = gen_reg_rtx (V2DImode);
10165 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10166 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10167 }
10168
10169 x = gen_rtx_CONST_VECTOR (V4SImode,
10170 gen_rtvec (4, GEN_INT (0x43300000UL),
10171 GEN_INT (0x45300000UL),
10172 const0_rtx, const0_rtx));
10173 exponents = validize_mem (force_const_mem (V4SImode, x));
10174
10175 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10176 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10177
10178 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10179 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10180 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10181 (0x1.0p84 + double(fp_value_hi_xmm)).
10182 Note these exponents differ by 32. */
10183
10184 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10185
10186 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10187 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10188 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10189 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10190 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10191 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10192 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10193 biases = validize_mem (force_const_mem (V2DFmode, biases));
10194 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10195
10196 /* Add the upper and lower DFmode values together. */
10197 if (TARGET_SSE3)
10198 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10199 else
10200 {
10201 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10202 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10203 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10204 }
10205
10206 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10207 }
10208
10209 /* Convert an unsigned SImode value into a DFmode. Only currently used
10210 for SSE, but applicable anywhere. */
10211
10212 void
10213 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10214 {
10215 REAL_VALUE_TYPE TWO31r;
10216 rtx x, fp;
10217
10218 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10219 NULL, 1, OPTAB_DIRECT);
10220
10221 fp = gen_reg_rtx (DFmode);
10222 emit_insn (gen_floatsidf2 (fp, x));
10223
10224 real_ldexp (&TWO31r, &dconst1, 31);
10225 x = const_double_from_real_value (TWO31r, DFmode);
10226
10227 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10228 if (x != target)
10229 emit_move_insn (target, x);
10230 }
10231
10232 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10233 32-bit mode; otherwise we have a direct convert instruction. */
10234
10235 void
10236 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10237 {
10238 REAL_VALUE_TYPE TWO32r;
10239 rtx fp_lo, fp_hi, x;
10240
10241 fp_lo = gen_reg_rtx (DFmode);
10242 fp_hi = gen_reg_rtx (DFmode);
10243
10244 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10245
10246 real_ldexp (&TWO32r, &dconst1, 32);
10247 x = const_double_from_real_value (TWO32r, DFmode);
10248 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10249
10250 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10251
10252 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10253 0, OPTAB_DIRECT);
10254 if (x != target)
10255 emit_move_insn (target, x);
10256 }
10257
10258 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10259 For x86_32, -mfpmath=sse, !optimize_size only. */
10260 void
10261 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10262 {
10263 REAL_VALUE_TYPE ONE16r;
10264 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10265
10266 real_ldexp (&ONE16r, &dconst1, 16);
10267 x = const_double_from_real_value (ONE16r, SFmode);
10268 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10269 NULL, 0, OPTAB_DIRECT);
10270 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10271 NULL, 0, OPTAB_DIRECT);
10272 fp_hi = gen_reg_rtx (SFmode);
10273 fp_lo = gen_reg_rtx (SFmode);
10274 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10275 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10276 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10277 0, OPTAB_DIRECT);
10278 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10279 0, OPTAB_DIRECT);
10280 if (!rtx_equal_p (target, fp_hi))
10281 emit_move_insn (target, fp_hi);
10282 }
10283
10284 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10285 then replicate the value for all elements of the vector
10286 register. */
10287
10288 rtx
10289 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10290 {
10291 rtvec v;
10292 switch (mode)
10293 {
10294 case SFmode:
10295 if (vect)
10296 v = gen_rtvec (4, value, value, value, value);
10297 else
10298 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10299 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10300 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10301
10302 case DFmode:
10303 if (vect)
10304 v = gen_rtvec (2, value, value);
10305 else
10306 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10307 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10308
10309 default:
10310 gcc_unreachable ();
10311 }
10312 }
10313
10314 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10315 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10316 true, then replicate the mask for all elements of the vector register.
10317 If INVERT is true, then create a mask excluding the sign bit. */
10318
10319 rtx
10320 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10321 {
10322 enum machine_mode vec_mode;
10323 HOST_WIDE_INT hi, lo;
10324 int shift = 63;
10325 rtx v;
10326 rtx mask;
10327
10328 /* Find the sign bit, sign extended to 2*HWI. */
10329 if (mode == SFmode)
10330 lo = 0x80000000, hi = lo < 0;
10331 else if (HOST_BITS_PER_WIDE_INT >= 64)
10332 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10333 else
10334 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10335
10336 if (invert)
10337 lo = ~lo, hi = ~hi;
10338
10339 /* Force this value into the low part of a fp vector constant. */
10340 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10341 mask = gen_lowpart (mode, mask);
10342
10343 v = ix86_build_const_vector (mode, vect, mask);
10344 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10345 return force_reg (vec_mode, v);
10346 }
10347
10348 /* Generate code for floating point ABS or NEG. */
10349
10350 void
10351 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10352 rtx operands[])
10353 {
10354 rtx mask, set, use, clob, dst, src;
10355 bool matching_memory;
10356 bool use_sse = false;
10357 bool vector_mode = VECTOR_MODE_P (mode);
10358 enum machine_mode elt_mode = mode;
10359
10360 if (vector_mode)
10361 {
10362 elt_mode = GET_MODE_INNER (mode);
10363 use_sse = true;
10364 }
10365 else if (TARGET_SSE_MATH)
10366 use_sse = SSE_FLOAT_MODE_P (mode);
10367
10368 /* NEG and ABS performed with SSE use bitwise mask operations.
10369 Create the appropriate mask now. */
10370 if (use_sse)
10371 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10372 else
10373 mask = NULL_RTX;
10374
10375 dst = operands[0];
10376 src = operands[1];
10377
10378 /* If the destination is memory, and we don't have matching source
10379 operands or we're using the x87, do things in registers. */
10380 matching_memory = false;
10381 if (MEM_P (dst))
10382 {
10383 if (use_sse && rtx_equal_p (dst, src))
10384 matching_memory = true;
10385 else
10386 dst = gen_reg_rtx (mode);
10387 }
10388 if (MEM_P (src) && !matching_memory)
10389 src = force_reg (mode, src);
10390
10391 if (vector_mode)
10392 {
10393 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10394 set = gen_rtx_SET (VOIDmode, dst, set);
10395 emit_insn (set);
10396 }
10397 else
10398 {
10399 set = gen_rtx_fmt_e (code, mode, src);
10400 set = gen_rtx_SET (VOIDmode, dst, set);
10401 if (mask)
10402 {
10403 use = gen_rtx_USE (VOIDmode, mask);
10404 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10405 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10406 gen_rtvec (3, set, use, clob)));
10407 }
10408 else
10409 emit_insn (set);
10410 }
10411
10412 if (dst != operands[0])
10413 emit_move_insn (operands[0], dst);
10414 }
10415
10416 /* Expand a copysign operation. Special case operand 0 being a constant. */
10417
10418 void
10419 ix86_expand_copysign (rtx operands[])
10420 {
10421 enum machine_mode mode, vmode;
10422 rtx dest, op0, op1, mask, nmask;
10423
10424 dest = operands[0];
10425 op0 = operands[1];
10426 op1 = operands[2];
10427
10428 mode = GET_MODE (dest);
10429 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10430
10431 if (GET_CODE (op0) == CONST_DOUBLE)
10432 {
10433 rtvec v;
10434
10435 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10436 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10437
10438 if (op0 == CONST0_RTX (mode))
10439 op0 = CONST0_RTX (vmode);
10440 else
10441 {
10442 if (mode == SFmode)
10443 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10444 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10445 else
10446 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10447 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10448 }
10449
10450 mask = ix86_build_signbit_mask (mode, 0, 0);
10451
10452 if (mode == SFmode)
10453 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10454 else
10455 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10456 }
10457 else
10458 {
10459 nmask = ix86_build_signbit_mask (mode, 0, 1);
10460 mask = ix86_build_signbit_mask (mode, 0, 0);
10461
10462 if (mode == SFmode)
10463 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10464 else
10465 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10466 }
10467 }
10468
10469 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10470 be a constant, and so has already been expanded into a vector constant. */
10471
10472 void
10473 ix86_split_copysign_const (rtx operands[])
10474 {
10475 enum machine_mode mode, vmode;
10476 rtx dest, op0, op1, mask, x;
10477
10478 dest = operands[0];
10479 op0 = operands[1];
10480 op1 = operands[2];
10481 mask = operands[3];
10482
10483 mode = GET_MODE (dest);
10484 vmode = GET_MODE (mask);
10485
10486 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10487 x = gen_rtx_AND (vmode, dest, mask);
10488 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10489
10490 if (op0 != CONST0_RTX (vmode))
10491 {
10492 x = gen_rtx_IOR (vmode, dest, op0);
10493 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10494 }
10495 }
10496
10497 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10498 so we have to do two masks. */
10499
10500 void
10501 ix86_split_copysign_var (rtx operands[])
10502 {
10503 enum machine_mode mode, vmode;
10504 rtx dest, scratch, op0, op1, mask, nmask, x;
10505
10506 dest = operands[0];
10507 scratch = operands[1];
10508 op0 = operands[2];
10509 op1 = operands[3];
10510 nmask = operands[4];
10511 mask = operands[5];
10512
10513 mode = GET_MODE (dest);
10514 vmode = GET_MODE (mask);
10515
10516 if (rtx_equal_p (op0, op1))
10517 {
10518 /* Shouldn't happen often (it's useless, obviously), but when it does
10519 we'd generate incorrect code if we continue below. */
10520 emit_move_insn (dest, op0);
10521 return;
10522 }
10523
10524 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10525 {
10526 gcc_assert (REGNO (op1) == REGNO (scratch));
10527
10528 x = gen_rtx_AND (vmode, scratch, mask);
10529 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10530
10531 dest = mask;
10532 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10533 x = gen_rtx_NOT (vmode, dest);
10534 x = gen_rtx_AND (vmode, x, op0);
10535 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10536 }
10537 else
10538 {
10539 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10540 {
10541 x = gen_rtx_AND (vmode, scratch, mask);
10542 }
10543 else /* alternative 2,4 */
10544 {
10545 gcc_assert (REGNO (mask) == REGNO (scratch));
10546 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10547 x = gen_rtx_AND (vmode, scratch, op1);
10548 }
10549 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10550
10551 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10552 {
10553 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10554 x = gen_rtx_AND (vmode, dest, nmask);
10555 }
10556 else /* alternative 3,4 */
10557 {
10558 gcc_assert (REGNO (nmask) == REGNO (dest));
10559 dest = nmask;
10560 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10561 x = gen_rtx_AND (vmode, dest, op0);
10562 }
10563 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10564 }
10565
10566 x = gen_rtx_IOR (vmode, dest, scratch);
10567 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10568 }
10569
10570 /* Return TRUE or FALSE depending on whether the first SET in INSN
10571 has source and destination with matching CC modes, and that the
10572 CC mode is at least as constrained as REQ_MODE. */
10573
10574 int
10575 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10576 {
10577 rtx set;
10578 enum machine_mode set_mode;
10579
10580 set = PATTERN (insn);
10581 if (GET_CODE (set) == PARALLEL)
10582 set = XVECEXP (set, 0, 0);
10583 gcc_assert (GET_CODE (set) == SET);
10584 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10585
10586 set_mode = GET_MODE (SET_DEST (set));
10587 switch (set_mode)
10588 {
10589 case CCNOmode:
10590 if (req_mode != CCNOmode
10591 && (req_mode != CCmode
10592 || XEXP (SET_SRC (set), 1) != const0_rtx))
10593 return 0;
10594 break;
10595 case CCmode:
10596 if (req_mode == CCGCmode)
10597 return 0;
10598 /* FALLTHRU */
10599 case CCGCmode:
10600 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10601 return 0;
10602 /* FALLTHRU */
10603 case CCGOCmode:
10604 if (req_mode == CCZmode)
10605 return 0;
10606 /* FALLTHRU */
10607 case CCZmode:
10608 break;
10609
10610 default:
10611 gcc_unreachable ();
10612 }
10613
10614 return (GET_MODE (SET_SRC (set)) == set_mode);
10615 }
10616
10617 /* Generate insn patterns to do an integer compare of OPERANDS. */
10618
10619 static rtx
10620 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10621 {
10622 enum machine_mode cmpmode;
10623 rtx tmp, flags;
10624
10625 cmpmode = SELECT_CC_MODE (code, op0, op1);
10626 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10627
10628 /* This is very simple, but making the interface the same as in the
10629 FP case makes the rest of the code easier. */
10630 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10631 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10632
10633 /* Return the test that should be put into the flags user, i.e.
10634 the bcc, scc, or cmov instruction. */
10635 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10636 }
10637
10638 /* Figure out whether to use ordered or unordered fp comparisons.
10639 Return the appropriate mode to use. */
10640
10641 enum machine_mode
10642 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10643 {
10644 /* ??? In order to make all comparisons reversible, we do all comparisons
10645 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10646 all forms trapping and nontrapping comparisons, we can make inequality
10647 comparisons trapping again, since it results in better code when using
10648 FCOM based compares. */
10649 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10650 }
10651
10652 enum machine_mode
10653 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10654 {
10655 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10656 return ix86_fp_compare_mode (code);
10657 switch (code)
10658 {
10659 /* Only zero flag is needed. */
10660 case EQ: /* ZF=0 */
10661 case NE: /* ZF!=0 */
10662 return CCZmode;
10663 /* Codes needing carry flag. */
10664 case GEU: /* CF=0 */
10665 case GTU: /* CF=0 & ZF=0 */
10666 case LTU: /* CF=1 */
10667 case LEU: /* CF=1 | ZF=1 */
10668 return CCmode;
10669 /* Codes possibly doable only with sign flag when
10670 comparing against zero. */
10671 case GE: /* SF=OF or SF=0 */
10672 case LT: /* SF<>OF or SF=1 */
10673 if (op1 == const0_rtx)
10674 return CCGOCmode;
10675 else
10676 /* For other cases Carry flag is not required. */
10677 return CCGCmode;
10678 /* Codes doable only with sign flag when comparing
10679 against zero, but we miss jump instruction for it
10680 so we need to use relational tests against overflow
10681 that thus needs to be zero. */
10682 case GT: /* ZF=0 & SF=OF */
10683 case LE: /* ZF=1 | SF<>OF */
10684 if (op1 == const0_rtx)
10685 return CCNOmode;
10686 else
10687 return CCGCmode;
10688 /* strcmp pattern do (use flags) and combine may ask us for proper
10689 mode. */
10690 case USE:
10691 return CCmode;
10692 default:
10693 gcc_unreachable ();
10694 }
10695 }
10696
10697 /* Return the fixed registers used for condition codes. */
10698
10699 static bool
10700 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10701 {
10702 *p1 = FLAGS_REG;
10703 *p2 = FPSR_REG;
10704 return true;
10705 }
10706
10707 /* If two condition code modes are compatible, return a condition code
10708 mode which is compatible with both. Otherwise, return
10709 VOIDmode. */
10710
10711 static enum machine_mode
10712 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10713 {
10714 if (m1 == m2)
10715 return m1;
10716
10717 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10718 return VOIDmode;
10719
10720 if ((m1 == CCGCmode && m2 == CCGOCmode)
10721 || (m1 == CCGOCmode && m2 == CCGCmode))
10722 return CCGCmode;
10723
10724 switch (m1)
10725 {
10726 default:
10727 gcc_unreachable ();
10728
10729 case CCmode:
10730 case CCGCmode:
10731 case CCGOCmode:
10732 case CCNOmode:
10733 case CCZmode:
10734 switch (m2)
10735 {
10736 default:
10737 return VOIDmode;
10738
10739 case CCmode:
10740 case CCGCmode:
10741 case CCGOCmode:
10742 case CCNOmode:
10743 case CCZmode:
10744 return CCmode;
10745 }
10746
10747 case CCFPmode:
10748 case CCFPUmode:
10749 /* These are only compatible with themselves, which we already
10750 checked above. */
10751 return VOIDmode;
10752 }
10753 }
10754
10755 /* Return true if we should use an FCOMI instruction for this fp comparison. */
10756
10757 int
10758 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10759 {
10760 enum rtx_code swapped_code = swap_condition (code);
10761 return ((ix86_fp_comparison_cost (code) == ix86_fp_comparison_fcomi_cost (code))
10762 || (ix86_fp_comparison_cost (swapped_code)
10763 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10764 }
10765
10766 /* Swap, force into registers, or otherwise massage the two operands
10767 to a fp comparison. The operands are updated in place; the new
10768 comparison code is returned. */
10769
10770 static enum rtx_code
10771 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10772 {
10773 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10774 rtx op0 = *pop0, op1 = *pop1;
10775 enum machine_mode op_mode = GET_MODE (op0);
10776 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10777
10778 /* All of the unordered compare instructions only work on registers.
10779 The same is true of the fcomi compare instructions. The XFmode
10780 compare instructions require registers except when comparing
10781 against zero or when converting operand 1 from fixed point to
10782 floating point. */
10783
10784 if (!is_sse
10785 && (fpcmp_mode == CCFPUmode
10786 || (op_mode == XFmode
10787 && ! (standard_80387_constant_p (op0) == 1
10788 || standard_80387_constant_p (op1) == 1)
10789 && GET_CODE (op1) != FLOAT)
10790 || ix86_use_fcomi_compare (code)))
10791 {
10792 op0 = force_reg (op_mode, op0);
10793 op1 = force_reg (op_mode, op1);
10794 }
10795 else
10796 {
10797 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10798 things around if they appear profitable, otherwise force op0
10799 into a register. */
10800
10801 if (standard_80387_constant_p (op0) == 0
10802 || (MEM_P (op0)
10803 && ! (standard_80387_constant_p (op1) == 0
10804 || MEM_P (op1))))
10805 {
10806 rtx tmp;
10807 tmp = op0, op0 = op1, op1 = tmp;
10808 code = swap_condition (code);
10809 }
10810
10811 if (!REG_P (op0))
10812 op0 = force_reg (op_mode, op0);
10813
10814 if (CONSTANT_P (op1))
10815 {
10816 int tmp = standard_80387_constant_p (op1);
10817 if (tmp == 0)
10818 op1 = validize_mem (force_const_mem (op_mode, op1));
10819 else if (tmp == 1)
10820 {
10821 if (TARGET_CMOVE)
10822 op1 = force_reg (op_mode, op1);
10823 }
10824 else
10825 op1 = force_reg (op_mode, op1);
10826 }
10827 }
10828
10829 /* Try to rearrange the comparison to make it cheaper. */
10830 if (ix86_fp_comparison_cost (code)
10831 > ix86_fp_comparison_cost (swap_condition (code))
10832 && (REG_P (op1) || !no_new_pseudos))
10833 {
10834 rtx tmp;
10835 tmp = op0, op0 = op1, op1 = tmp;
10836 code = swap_condition (code);
10837 if (!REG_P (op0))
10838 op0 = force_reg (op_mode, op0);
10839 }
10840
10841 *pop0 = op0;
10842 *pop1 = op1;
10843 return code;
10844 }
10845
10846 /* Convert comparison codes we use to represent FP comparison to integer
10847 code that will result in proper branch. Return UNKNOWN if no such code
10848 is available. */
10849
10850 enum rtx_code
10851 ix86_fp_compare_code_to_integer (enum rtx_code code)
10852 {
10853 switch (code)
10854 {
10855 case GT:
10856 return GTU;
10857 case GE:
10858 return GEU;
10859 case ORDERED:
10860 case UNORDERED:
10861 return code;
10862 break;
10863 case UNEQ:
10864 return EQ;
10865 break;
10866 case UNLT:
10867 return LTU;
10868 break;
10869 case UNLE:
10870 return LEU;
10871 break;
10872 case LTGT:
10873 return NE;
10874 break;
10875 default:
10876 return UNKNOWN;
10877 }
10878 }
10879
10880 /* Split comparison code CODE into comparisons we can do using branch
10881 instructions. BYPASS_CODE is comparison code for branch that will
10882 branch around FIRST_CODE and SECOND_CODE. If some of branches
10883 is not required, set value to UNKNOWN.
10884 We never require more than two branches. */
10885
10886 void
10887 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10888 enum rtx_code *first_code,
10889 enum rtx_code *second_code)
10890 {
10891 *first_code = code;
10892 *bypass_code = UNKNOWN;
10893 *second_code = UNKNOWN;
10894
10895 /* The fcomi comparison sets flags as follows:
10896
10897 cmp ZF PF CF
10898 > 0 0 0
10899 < 0 0 1
10900 = 1 0 0
10901 un 1 1 1 */
10902
10903 switch (code)
10904 {
10905 case GT: /* GTU - CF=0 & ZF=0 */
10906 case GE: /* GEU - CF=0 */
10907 case ORDERED: /* PF=0 */
10908 case UNORDERED: /* PF=1 */
10909 case UNEQ: /* EQ - ZF=1 */
10910 case UNLT: /* LTU - CF=1 */
10911 case UNLE: /* LEU - CF=1 | ZF=1 */
10912 case LTGT: /* EQ - ZF=0 */
10913 break;
10914 case LT: /* LTU - CF=1 - fails on unordered */
10915 *first_code = UNLT;
10916 *bypass_code = UNORDERED;
10917 break;
10918 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10919 *first_code = UNLE;
10920 *bypass_code = UNORDERED;
10921 break;
10922 case EQ: /* EQ - ZF=1 - fails on unordered */
10923 *first_code = UNEQ;
10924 *bypass_code = UNORDERED;
10925 break;
10926 case NE: /* NE - ZF=0 - fails on unordered */
10927 *first_code = LTGT;
10928 *second_code = UNORDERED;
10929 break;
10930 case UNGE: /* GEU - CF=0 - fails on unordered */
10931 *first_code = GE;
10932 *second_code = UNORDERED;
10933 break;
10934 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10935 *first_code = GT;
10936 *second_code = UNORDERED;
10937 break;
10938 default:
10939 gcc_unreachable ();
10940 }
10941 if (!TARGET_IEEE_FP)
10942 {
10943 *second_code = UNKNOWN;
10944 *bypass_code = UNKNOWN;
10945 }
10946 }
10947
10948 /* Return cost of comparison done fcom + arithmetics operations on AX.
10949 All following functions do use number of instructions as a cost metrics.
10950 In future this should be tweaked to compute bytes for optimize_size and
10951 take into account performance of various instructions on various CPUs. */
10952 static int
10953 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10954 {
10955 if (!TARGET_IEEE_FP)
10956 return 4;
10957 /* The cost of code output by ix86_expand_fp_compare. */
10958 switch (code)
10959 {
10960 case UNLE:
10961 case UNLT:
10962 case LTGT:
10963 case GT:
10964 case GE:
10965 case UNORDERED:
10966 case ORDERED:
10967 case UNEQ:
10968 return 4;
10969 break;
10970 case LT:
10971 case NE:
10972 case EQ:
10973 case UNGE:
10974 return 5;
10975 break;
10976 case LE:
10977 case UNGT:
10978 return 6;
10979 break;
10980 default:
10981 gcc_unreachable ();
10982 }
10983 }
10984
10985 /* Return cost of comparison done using fcomi operation.
10986 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10987 static int
10988 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10989 {
10990 enum rtx_code bypass_code, first_code, second_code;
10991 /* Return arbitrarily high cost when instruction is not supported - this
10992 prevents gcc from using it. */
10993 if (!TARGET_CMOVE)
10994 return 1024;
10995 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10996 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10997 }
10998
10999 /* Return cost of comparison done using sahf operation.
11000 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11001 static int
11002 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11003 {
11004 enum rtx_code bypass_code, first_code, second_code;
11005 /* Return arbitrarily high cost when instruction is not preferred - this
11006 avoids gcc from using it. */
11007 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11008 return 1024;
11009 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11010 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11011 }
11012
11013 /* Compute cost of the comparison done using any method.
11014 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11015 static int
11016 ix86_fp_comparison_cost (enum rtx_code code)
11017 {
11018 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11019 int min;
11020
11021 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11022 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11023
11024 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11025 if (min > sahf_cost)
11026 min = sahf_cost;
11027 if (min > fcomi_cost)
11028 min = fcomi_cost;
11029 return min;
11030 }
11031
11032 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11033
11034 static rtx
11035 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11036 rtx *second_test, rtx *bypass_test)
11037 {
11038 enum machine_mode fpcmp_mode, intcmp_mode;
11039 rtx tmp, tmp2;
11040 int cost = ix86_fp_comparison_cost (code);
11041 enum rtx_code bypass_code, first_code, second_code;
11042
11043 fpcmp_mode = ix86_fp_compare_mode (code);
11044 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11045
11046 if (second_test)
11047 *second_test = NULL_RTX;
11048 if (bypass_test)
11049 *bypass_test = NULL_RTX;
11050
11051 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11052
11053 /* Do fcomi/sahf based test when profitable. */
11054 if ((TARGET_CMOVE || TARGET_SAHF)
11055 && (bypass_code == UNKNOWN || bypass_test)
11056 && (second_code == UNKNOWN || second_test)
11057 && ix86_fp_comparison_arithmetics_cost (code) > cost)
11058 {
11059 if (TARGET_CMOVE)
11060 {
11061 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11062 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11063 tmp);
11064 emit_insn (tmp);
11065 }
11066 else
11067 {
11068 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11069 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11070 if (!scratch)
11071 scratch = gen_reg_rtx (HImode);
11072 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11073 emit_insn (gen_x86_sahf_1 (scratch));
11074 }
11075
11076 /* The FP codes work out to act like unsigned. */
11077 intcmp_mode = fpcmp_mode;
11078 code = first_code;
11079 if (bypass_code != UNKNOWN)
11080 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11081 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11082 const0_rtx);
11083 if (second_code != UNKNOWN)
11084 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11085 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11086 const0_rtx);
11087 }
11088 else
11089 {
11090 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11091 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11092 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11093 if (!scratch)
11094 scratch = gen_reg_rtx (HImode);
11095 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11096
11097 /* In the unordered case, we have to check C2 for NaN's, which
11098 doesn't happen to work out to anything nice combination-wise.
11099 So do some bit twiddling on the value we've got in AH to come
11100 up with an appropriate set of condition codes. */
11101
11102 intcmp_mode = CCNOmode;
11103 switch (code)
11104 {
11105 case GT:
11106 case UNGT:
11107 if (code == GT || !TARGET_IEEE_FP)
11108 {
11109 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11110 code = EQ;
11111 }
11112 else
11113 {
11114 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11115 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11116 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11117 intcmp_mode = CCmode;
11118 code = GEU;
11119 }
11120 break;
11121 case LT:
11122 case UNLT:
11123 if (code == LT && TARGET_IEEE_FP)
11124 {
11125 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11126 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11127 intcmp_mode = CCmode;
11128 code = EQ;
11129 }
11130 else
11131 {
11132 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11133 code = NE;
11134 }
11135 break;
11136 case GE:
11137 case UNGE:
11138 if (code == GE || !TARGET_IEEE_FP)
11139 {
11140 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11141 code = EQ;
11142 }
11143 else
11144 {
11145 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11146 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11147 GEN_INT (0x01)));
11148 code = NE;
11149 }
11150 break;
11151 case LE:
11152 case UNLE:
11153 if (code == LE && TARGET_IEEE_FP)
11154 {
11155 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11156 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11157 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11158 intcmp_mode = CCmode;
11159 code = LTU;
11160 }
11161 else
11162 {
11163 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11164 code = NE;
11165 }
11166 break;
11167 case EQ:
11168 case UNEQ:
11169 if (code == EQ && TARGET_IEEE_FP)
11170 {
11171 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11172 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11173 intcmp_mode = CCmode;
11174 code = EQ;
11175 }
11176 else
11177 {
11178 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11179 code = NE;
11180 break;
11181 }
11182 break;
11183 case NE:
11184 case LTGT:
11185 if (code == NE && TARGET_IEEE_FP)
11186 {
11187 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11188 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11189 GEN_INT (0x40)));
11190 code = NE;
11191 }
11192 else
11193 {
11194 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11195 code = EQ;
11196 }
11197 break;
11198
11199 case UNORDERED:
11200 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11201 code = NE;
11202 break;
11203 case ORDERED:
11204 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11205 code = EQ;
11206 break;
11207
11208 default:
11209 gcc_unreachable ();
11210 }
11211 }
11212
11213 /* Return the test that should be put into the flags user, i.e.
11214 the bcc, scc, or cmov instruction. */
11215 return gen_rtx_fmt_ee (code, VOIDmode,
11216 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11217 const0_rtx);
11218 }
11219
11220 rtx
11221 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11222 {
11223 rtx op0, op1, ret;
11224 op0 = ix86_compare_op0;
11225 op1 = ix86_compare_op1;
11226
11227 if (second_test)
11228 *second_test = NULL_RTX;
11229 if (bypass_test)
11230 *bypass_test = NULL_RTX;
11231
11232 if (ix86_compare_emitted)
11233 {
11234 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11235 ix86_compare_emitted = NULL_RTX;
11236 }
11237 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11238 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11239 second_test, bypass_test);
11240 else
11241 ret = ix86_expand_int_compare (code, op0, op1);
11242
11243 return ret;
11244 }
11245
11246 /* Return true if the CODE will result in nontrivial jump sequence. */
11247 bool
11248 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11249 {
11250 enum rtx_code bypass_code, first_code, second_code;
11251 if (!TARGET_CMOVE)
11252 return true;
11253 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11254 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11255 }
11256
11257 void
11258 ix86_expand_branch (enum rtx_code code, rtx label)
11259 {
11260 rtx tmp;
11261
11262 /* If we have emitted a compare insn, go straight to simple.
11263 ix86_expand_compare won't emit anything if ix86_compare_emitted
11264 is non NULL. */
11265 if (ix86_compare_emitted)
11266 goto simple;
11267
11268 switch (GET_MODE (ix86_compare_op0))
11269 {
11270 case QImode:
11271 case HImode:
11272 case SImode:
11273 simple:
11274 tmp = ix86_expand_compare (code, NULL, NULL);
11275 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11276 gen_rtx_LABEL_REF (VOIDmode, label),
11277 pc_rtx);
11278 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11279 return;
11280
11281 case SFmode:
11282 case DFmode:
11283 case XFmode:
11284 {
11285 rtvec vec;
11286 int use_fcomi;
11287 enum rtx_code bypass_code, first_code, second_code;
11288
11289 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11290 &ix86_compare_op1);
11291
11292 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11293
11294 /* Check whether we will use the natural sequence with one jump. If
11295 so, we can expand jump early. Otherwise delay expansion by
11296 creating compound insn to not confuse optimizers. */
11297 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11298 && TARGET_CMOVE)
11299 {
11300 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11301 gen_rtx_LABEL_REF (VOIDmode, label),
11302 pc_rtx, NULL_RTX, NULL_RTX);
11303 }
11304 else
11305 {
11306 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11307 ix86_compare_op0, ix86_compare_op1);
11308 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11309 gen_rtx_LABEL_REF (VOIDmode, label),
11310 pc_rtx);
11311 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11312
11313 use_fcomi = ix86_use_fcomi_compare (code);
11314 vec = rtvec_alloc (3 + !use_fcomi);
11315 RTVEC_ELT (vec, 0) = tmp;
11316 RTVEC_ELT (vec, 1)
11317 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11318 RTVEC_ELT (vec, 2)
11319 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11320 if (! use_fcomi)
11321 RTVEC_ELT (vec, 3)
11322 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11323
11324 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11325 }
11326 return;
11327 }
11328
11329 case DImode:
11330 if (TARGET_64BIT)
11331 goto simple;
11332 case TImode:
11333 /* Expand DImode branch into multiple compare+branch. */
11334 {
11335 rtx lo[2], hi[2], label2;
11336 enum rtx_code code1, code2, code3;
11337 enum machine_mode submode;
11338
11339 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11340 {
11341 tmp = ix86_compare_op0;
11342 ix86_compare_op0 = ix86_compare_op1;
11343 ix86_compare_op1 = tmp;
11344 code = swap_condition (code);
11345 }
11346 if (GET_MODE (ix86_compare_op0) == DImode)
11347 {
11348 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11349 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11350 submode = SImode;
11351 }
11352 else
11353 {
11354 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11355 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11356 submode = DImode;
11357 }
11358
11359 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11360 avoid two branches. This costs one extra insn, so disable when
11361 optimizing for size. */
11362
11363 if ((code == EQ || code == NE)
11364 && (!optimize_size
11365 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11366 {
11367 rtx xor0, xor1;
11368
11369 xor1 = hi[0];
11370 if (hi[1] != const0_rtx)
11371 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11372 NULL_RTX, 0, OPTAB_WIDEN);
11373
11374 xor0 = lo[0];
11375 if (lo[1] != const0_rtx)
11376 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11377 NULL_RTX, 0, OPTAB_WIDEN);
11378
11379 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11380 NULL_RTX, 0, OPTAB_WIDEN);
11381
11382 ix86_compare_op0 = tmp;
11383 ix86_compare_op1 = const0_rtx;
11384 ix86_expand_branch (code, label);
11385 return;
11386 }
11387
11388 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11389 op1 is a constant and the low word is zero, then we can just
11390 examine the high word. */
11391
11392 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11393 switch (code)
11394 {
11395 case LT: case LTU: case GE: case GEU:
11396 ix86_compare_op0 = hi[0];
11397 ix86_compare_op1 = hi[1];
11398 ix86_expand_branch (code, label);
11399 return;
11400 default:
11401 break;
11402 }
11403
11404 /* Otherwise, we need two or three jumps. */
11405
11406 label2 = gen_label_rtx ();
11407
11408 code1 = code;
11409 code2 = swap_condition (code);
11410 code3 = unsigned_condition (code);
11411
11412 switch (code)
11413 {
11414 case LT: case GT: case LTU: case GTU:
11415 break;
11416
11417 case LE: code1 = LT; code2 = GT; break;
11418 case GE: code1 = GT; code2 = LT; break;
11419 case LEU: code1 = LTU; code2 = GTU; break;
11420 case GEU: code1 = GTU; code2 = LTU; break;
11421
11422 case EQ: code1 = UNKNOWN; code2 = NE; break;
11423 case NE: code2 = UNKNOWN; break;
11424
11425 default:
11426 gcc_unreachable ();
11427 }
11428
11429 /*
11430 * a < b =>
11431 * if (hi(a) < hi(b)) goto true;
11432 * if (hi(a) > hi(b)) goto false;
11433 * if (lo(a) < lo(b)) goto true;
11434 * false:
11435 */
11436
11437 ix86_compare_op0 = hi[0];
11438 ix86_compare_op1 = hi[1];
11439
11440 if (code1 != UNKNOWN)
11441 ix86_expand_branch (code1, label);
11442 if (code2 != UNKNOWN)
11443 ix86_expand_branch (code2, label2);
11444
11445 ix86_compare_op0 = lo[0];
11446 ix86_compare_op1 = lo[1];
11447 ix86_expand_branch (code3, label);
11448
11449 if (code2 != UNKNOWN)
11450 emit_label (label2);
11451 return;
11452 }
11453
11454 default:
11455 gcc_unreachable ();
11456 }
11457 }
11458
11459 /* Split branch based on floating point condition. */
11460 void
11461 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11462 rtx target1, rtx target2, rtx tmp, rtx pushed)
11463 {
11464 rtx second, bypass;
11465 rtx label = NULL_RTX;
11466 rtx condition;
11467 int bypass_probability = -1, second_probability = -1, probability = -1;
11468 rtx i;
11469
11470 if (target2 != pc_rtx)
11471 {
11472 rtx tmp = target2;
11473 code = reverse_condition_maybe_unordered (code);
11474 target2 = target1;
11475 target1 = tmp;
11476 }
11477
11478 condition = ix86_expand_fp_compare (code, op1, op2,
11479 tmp, &second, &bypass);
11480
11481 /* Remove pushed operand from stack. */
11482 if (pushed)
11483 ix86_free_from_memory (GET_MODE (pushed));
11484
11485 if (split_branch_probability >= 0)
11486 {
11487 /* Distribute the probabilities across the jumps.
11488 Assume the BYPASS and SECOND to be always test
11489 for UNORDERED. */
11490 probability = split_branch_probability;
11491
11492 /* Value of 1 is low enough to make no need for probability
11493 to be updated. Later we may run some experiments and see
11494 if unordered values are more frequent in practice. */
11495 if (bypass)
11496 bypass_probability = 1;
11497 if (second)
11498 second_probability = 1;
11499 }
11500 if (bypass != NULL_RTX)
11501 {
11502 label = gen_label_rtx ();
11503 i = emit_jump_insn (gen_rtx_SET
11504 (VOIDmode, pc_rtx,
11505 gen_rtx_IF_THEN_ELSE (VOIDmode,
11506 bypass,
11507 gen_rtx_LABEL_REF (VOIDmode,
11508 label),
11509 pc_rtx)));
11510 if (bypass_probability >= 0)
11511 REG_NOTES (i)
11512 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11513 GEN_INT (bypass_probability),
11514 REG_NOTES (i));
11515 }
11516 i = emit_jump_insn (gen_rtx_SET
11517 (VOIDmode, pc_rtx,
11518 gen_rtx_IF_THEN_ELSE (VOIDmode,
11519 condition, target1, target2)));
11520 if (probability >= 0)
11521 REG_NOTES (i)
11522 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11523 GEN_INT (probability),
11524 REG_NOTES (i));
11525 if (second != NULL_RTX)
11526 {
11527 i = emit_jump_insn (gen_rtx_SET
11528 (VOIDmode, pc_rtx,
11529 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11530 target2)));
11531 if (second_probability >= 0)
11532 REG_NOTES (i)
11533 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11534 GEN_INT (second_probability),
11535 REG_NOTES (i));
11536 }
11537 if (label != NULL_RTX)
11538 emit_label (label);
11539 }
11540
11541 int
11542 ix86_expand_setcc (enum rtx_code code, rtx dest)
11543 {
11544 rtx ret, tmp, tmpreg, equiv;
11545 rtx second_test, bypass_test;
11546
11547 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11548 return 0; /* FAIL */
11549
11550 gcc_assert (GET_MODE (dest) == QImode);
11551
11552 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11553 PUT_MODE (ret, QImode);
11554
11555 tmp = dest;
11556 tmpreg = dest;
11557
11558 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11559 if (bypass_test || second_test)
11560 {
11561 rtx test = second_test;
11562 int bypass = 0;
11563 rtx tmp2 = gen_reg_rtx (QImode);
11564 if (bypass_test)
11565 {
11566 gcc_assert (!second_test);
11567 test = bypass_test;
11568 bypass = 1;
11569 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11570 }
11571 PUT_MODE (test, QImode);
11572 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11573
11574 if (bypass)
11575 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11576 else
11577 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11578 }
11579
11580 /* Attach a REG_EQUAL note describing the comparison result. */
11581 if (ix86_compare_op0 && ix86_compare_op1)
11582 {
11583 equiv = simplify_gen_relational (code, QImode,
11584 GET_MODE (ix86_compare_op0),
11585 ix86_compare_op0, ix86_compare_op1);
11586 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11587 }
11588
11589 return 1; /* DONE */
11590 }
11591
11592 /* Expand comparison setting or clearing carry flag. Return true when
11593 successful and set pop for the operation. */
11594 static bool
11595 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11596 {
11597 enum machine_mode mode =
11598 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11599
11600 /* Do not handle DImode compares that go through special path. Also we can't
11601 deal with FP compares yet. This is possible to add. */
11602 if (mode == (TARGET_64BIT ? TImode : DImode))
11603 return false;
11604 if (FLOAT_MODE_P (mode))
11605 {
11606 rtx second_test = NULL, bypass_test = NULL;
11607 rtx compare_op, compare_seq;
11608
11609 /* Shortcut: following common codes never translate into carry flag compares. */
11610 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11611 || code == ORDERED || code == UNORDERED)
11612 return false;
11613
11614 /* These comparisons require zero flag; swap operands so they won't. */
11615 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11616 && !TARGET_IEEE_FP)
11617 {
11618 rtx tmp = op0;
11619 op0 = op1;
11620 op1 = tmp;
11621 code = swap_condition (code);
11622 }
11623
11624 /* Try to expand the comparison and verify that we end up with carry flag
11625 based comparison. This is fails to be true only when we decide to expand
11626 comparison using arithmetic that is not too common scenario. */
11627 start_sequence ();
11628 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11629 &second_test, &bypass_test);
11630 compare_seq = get_insns ();
11631 end_sequence ();
11632
11633 if (second_test || bypass_test)
11634 return false;
11635 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11636 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11637 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11638 else
11639 code = GET_CODE (compare_op);
11640 if (code != LTU && code != GEU)
11641 return false;
11642 emit_insn (compare_seq);
11643 *pop = compare_op;
11644 return true;
11645 }
11646 if (!INTEGRAL_MODE_P (mode))
11647 return false;
11648 switch (code)
11649 {
11650 case LTU:
11651 case GEU:
11652 break;
11653
11654 /* Convert a==0 into (unsigned)a<1. */
11655 case EQ:
11656 case NE:
11657 if (op1 != const0_rtx)
11658 return false;
11659 op1 = const1_rtx;
11660 code = (code == EQ ? LTU : GEU);
11661 break;
11662
11663 /* Convert a>b into b<a or a>=b-1. */
11664 case GTU:
11665 case LEU:
11666 if (CONST_INT_P (op1))
11667 {
11668 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11669 /* Bail out on overflow. We still can swap operands but that
11670 would force loading of the constant into register. */
11671 if (op1 == const0_rtx
11672 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11673 return false;
11674 code = (code == GTU ? GEU : LTU);
11675 }
11676 else
11677 {
11678 rtx tmp = op1;
11679 op1 = op0;
11680 op0 = tmp;
11681 code = (code == GTU ? LTU : GEU);
11682 }
11683 break;
11684
11685 /* Convert a>=0 into (unsigned)a<0x80000000. */
11686 case LT:
11687 case GE:
11688 if (mode == DImode || op1 != const0_rtx)
11689 return false;
11690 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11691 code = (code == LT ? GEU : LTU);
11692 break;
11693 case LE:
11694 case GT:
11695 if (mode == DImode || op1 != constm1_rtx)
11696 return false;
11697 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11698 code = (code == LE ? GEU : LTU);
11699 break;
11700
11701 default:
11702 return false;
11703 }
11704 /* Swapping operands may cause constant to appear as first operand. */
11705 if (!nonimmediate_operand (op0, VOIDmode))
11706 {
11707 if (no_new_pseudos)
11708 return false;
11709 op0 = force_reg (mode, op0);
11710 }
11711 ix86_compare_op0 = op0;
11712 ix86_compare_op1 = op1;
11713 *pop = ix86_expand_compare (code, NULL, NULL);
11714 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11715 return true;
11716 }
11717
11718 int
11719 ix86_expand_int_movcc (rtx operands[])
11720 {
11721 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11722 rtx compare_seq, compare_op;
11723 rtx second_test, bypass_test;
11724 enum machine_mode mode = GET_MODE (operands[0]);
11725 bool sign_bit_compare_p = false;;
11726
11727 start_sequence ();
11728 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11729 compare_seq = get_insns ();
11730 end_sequence ();
11731
11732 compare_code = GET_CODE (compare_op);
11733
11734 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11735 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11736 sign_bit_compare_p = true;
11737
11738 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11739 HImode insns, we'd be swallowed in word prefix ops. */
11740
11741 if ((mode != HImode || TARGET_FAST_PREFIX)
11742 && (mode != (TARGET_64BIT ? TImode : DImode))
11743 && CONST_INT_P (operands[2])
11744 && CONST_INT_P (operands[3]))
11745 {
11746 rtx out = operands[0];
11747 HOST_WIDE_INT ct = INTVAL (operands[2]);
11748 HOST_WIDE_INT cf = INTVAL (operands[3]);
11749 HOST_WIDE_INT diff;
11750
11751 diff = ct - cf;
11752 /* Sign bit compares are better done using shifts than we do by using
11753 sbb. */
11754 if (sign_bit_compare_p
11755 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11756 ix86_compare_op1, &compare_op))
11757 {
11758 /* Detect overlap between destination and compare sources. */
11759 rtx tmp = out;
11760
11761 if (!sign_bit_compare_p)
11762 {
11763 bool fpcmp = false;
11764
11765 compare_code = GET_CODE (compare_op);
11766
11767 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11768 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11769 {
11770 fpcmp = true;
11771 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11772 }
11773
11774 /* To simplify rest of code, restrict to the GEU case. */
11775 if (compare_code == LTU)
11776 {
11777 HOST_WIDE_INT tmp = ct;
11778 ct = cf;
11779 cf = tmp;
11780 compare_code = reverse_condition (compare_code);
11781 code = reverse_condition (code);
11782 }
11783 else
11784 {
11785 if (fpcmp)
11786 PUT_CODE (compare_op,
11787 reverse_condition_maybe_unordered
11788 (GET_CODE (compare_op)));
11789 else
11790 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11791 }
11792 diff = ct - cf;
11793
11794 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11795 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11796 tmp = gen_reg_rtx (mode);
11797
11798 if (mode == DImode)
11799 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11800 else
11801 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11802 }
11803 else
11804 {
11805 if (code == GT || code == GE)
11806 code = reverse_condition (code);
11807 else
11808 {
11809 HOST_WIDE_INT tmp = ct;
11810 ct = cf;
11811 cf = tmp;
11812 diff = ct - cf;
11813 }
11814 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11815 ix86_compare_op1, VOIDmode, 0, -1);
11816 }
11817
11818 if (diff == 1)
11819 {
11820 /*
11821 * cmpl op0,op1
11822 * sbbl dest,dest
11823 * [addl dest, ct]
11824 *
11825 * Size 5 - 8.
11826 */
11827 if (ct)
11828 tmp = expand_simple_binop (mode, PLUS,
11829 tmp, GEN_INT (ct),
11830 copy_rtx (tmp), 1, OPTAB_DIRECT);
11831 }
11832 else if (cf == -1)
11833 {
11834 /*
11835 * cmpl op0,op1
11836 * sbbl dest,dest
11837 * orl $ct, dest
11838 *
11839 * Size 8.
11840 */
11841 tmp = expand_simple_binop (mode, IOR,
11842 tmp, GEN_INT (ct),
11843 copy_rtx (tmp), 1, OPTAB_DIRECT);
11844 }
11845 else if (diff == -1 && ct)
11846 {
11847 /*
11848 * cmpl op0,op1
11849 * sbbl dest,dest
11850 * notl dest
11851 * [addl dest, cf]
11852 *
11853 * Size 8 - 11.
11854 */
11855 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11856 if (cf)
11857 tmp = expand_simple_binop (mode, PLUS,
11858 copy_rtx (tmp), GEN_INT (cf),
11859 copy_rtx (tmp), 1, OPTAB_DIRECT);
11860 }
11861 else
11862 {
11863 /*
11864 * cmpl op0,op1
11865 * sbbl dest,dest
11866 * [notl dest]
11867 * andl cf - ct, dest
11868 * [addl dest, ct]
11869 *
11870 * Size 8 - 11.
11871 */
11872
11873 if (cf == 0)
11874 {
11875 cf = ct;
11876 ct = 0;
11877 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11878 }
11879
11880 tmp = expand_simple_binop (mode, AND,
11881 copy_rtx (tmp),
11882 gen_int_mode (cf - ct, mode),
11883 copy_rtx (tmp), 1, OPTAB_DIRECT);
11884 if (ct)
11885 tmp = expand_simple_binop (mode, PLUS,
11886 copy_rtx (tmp), GEN_INT (ct),
11887 copy_rtx (tmp), 1, OPTAB_DIRECT);
11888 }
11889
11890 if (!rtx_equal_p (tmp, out))
11891 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11892
11893 return 1; /* DONE */
11894 }
11895
11896 if (diff < 0)
11897 {
11898 HOST_WIDE_INT tmp;
11899 tmp = ct, ct = cf, cf = tmp;
11900 diff = -diff;
11901 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11902 {
11903 /* We may be reversing unordered compare to normal compare, that
11904 is not valid in general (we may convert non-trapping condition
11905 to trapping one), however on i386 we currently emit all
11906 comparisons unordered. */
11907 compare_code = reverse_condition_maybe_unordered (compare_code);
11908 code = reverse_condition_maybe_unordered (code);
11909 }
11910 else
11911 {
11912 compare_code = reverse_condition (compare_code);
11913 code = reverse_condition (code);
11914 }
11915 }
11916
11917 compare_code = UNKNOWN;
11918 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11919 && CONST_INT_P (ix86_compare_op1))
11920 {
11921 if (ix86_compare_op1 == const0_rtx
11922 && (code == LT || code == GE))
11923 compare_code = code;
11924 else if (ix86_compare_op1 == constm1_rtx)
11925 {
11926 if (code == LE)
11927 compare_code = LT;
11928 else if (code == GT)
11929 compare_code = GE;
11930 }
11931 }
11932
11933 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11934 if (compare_code != UNKNOWN
11935 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11936 && (cf == -1 || ct == -1))
11937 {
11938 /* If lea code below could be used, only optimize
11939 if it results in a 2 insn sequence. */
11940
11941 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11942 || diff == 3 || diff == 5 || diff == 9)
11943 || (compare_code == LT && ct == -1)
11944 || (compare_code == GE && cf == -1))
11945 {
11946 /*
11947 * notl op1 (if necessary)
11948 * sarl $31, op1
11949 * orl cf, op1
11950 */
11951 if (ct != -1)
11952 {
11953 cf = ct;
11954 ct = -1;
11955 code = reverse_condition (code);
11956 }
11957
11958 out = emit_store_flag (out, code, ix86_compare_op0,
11959 ix86_compare_op1, VOIDmode, 0, -1);
11960
11961 out = expand_simple_binop (mode, IOR,
11962 out, GEN_INT (cf),
11963 out, 1, OPTAB_DIRECT);
11964 if (out != operands[0])
11965 emit_move_insn (operands[0], out);
11966
11967 return 1; /* DONE */
11968 }
11969 }
11970
11971
11972 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11973 || diff == 3 || diff == 5 || diff == 9)
11974 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11975 && (mode != DImode
11976 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11977 {
11978 /*
11979 * xorl dest,dest
11980 * cmpl op1,op2
11981 * setcc dest
11982 * lea cf(dest*(ct-cf)),dest
11983 *
11984 * Size 14.
11985 *
11986 * This also catches the degenerate setcc-only case.
11987 */
11988
11989 rtx tmp;
11990 int nops;
11991
11992 out = emit_store_flag (out, code, ix86_compare_op0,
11993 ix86_compare_op1, VOIDmode, 0, 1);
11994
11995 nops = 0;
11996 /* On x86_64 the lea instruction operates on Pmode, so we need
11997 to get arithmetics done in proper mode to match. */
11998 if (diff == 1)
11999 tmp = copy_rtx (out);
12000 else
12001 {
12002 rtx out1;
12003 out1 = copy_rtx (out);
12004 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12005 nops++;
12006 if (diff & 1)
12007 {
12008 tmp = gen_rtx_PLUS (mode, tmp, out1);
12009 nops++;
12010 }
12011 }
12012 if (cf != 0)
12013 {
12014 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12015 nops++;
12016 }
12017 if (!rtx_equal_p (tmp, out))
12018 {
12019 if (nops == 1)
12020 out = force_operand (tmp, copy_rtx (out));
12021 else
12022 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12023 }
12024 if (!rtx_equal_p (out, operands[0]))
12025 emit_move_insn (operands[0], copy_rtx (out));
12026
12027 return 1; /* DONE */
12028 }
12029
12030 /*
12031 * General case: Jumpful:
12032 * xorl dest,dest cmpl op1, op2
12033 * cmpl op1, op2 movl ct, dest
12034 * setcc dest jcc 1f
12035 * decl dest movl cf, dest
12036 * andl (cf-ct),dest 1:
12037 * addl ct,dest
12038 *
12039 * Size 20. Size 14.
12040 *
12041 * This is reasonably steep, but branch mispredict costs are
12042 * high on modern cpus, so consider failing only if optimizing
12043 * for space.
12044 */
12045
12046 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12047 && BRANCH_COST >= 2)
12048 {
12049 if (cf == 0)
12050 {
12051 cf = ct;
12052 ct = 0;
12053 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
12054 /* We may be reversing unordered compare to normal compare,
12055 that is not valid in general (we may convert non-trapping
12056 condition to trapping one), however on i386 we currently
12057 emit all comparisons unordered. */
12058 code = reverse_condition_maybe_unordered (code);
12059 else
12060 {
12061 code = reverse_condition (code);
12062 if (compare_code != UNKNOWN)
12063 compare_code = reverse_condition (compare_code);
12064 }
12065 }
12066
12067 if (compare_code != UNKNOWN)
12068 {
12069 /* notl op1 (if needed)
12070 sarl $31, op1
12071 andl (cf-ct), op1
12072 addl ct, op1
12073
12074 For x < 0 (resp. x <= -1) there will be no notl,
12075 so if possible swap the constants to get rid of the
12076 complement.
12077 True/false will be -1/0 while code below (store flag
12078 followed by decrement) is 0/-1, so the constants need
12079 to be exchanged once more. */
12080
12081 if (compare_code == GE || !cf)
12082 {
12083 code = reverse_condition (code);
12084 compare_code = LT;
12085 }
12086 else
12087 {
12088 HOST_WIDE_INT tmp = cf;
12089 cf = ct;
12090 ct = tmp;
12091 }
12092
12093 out = emit_store_flag (out, code, ix86_compare_op0,
12094 ix86_compare_op1, VOIDmode, 0, -1);
12095 }
12096 else
12097 {
12098 out = emit_store_flag (out, code, ix86_compare_op0,
12099 ix86_compare_op1, VOIDmode, 0, 1);
12100
12101 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12102 copy_rtx (out), 1, OPTAB_DIRECT);
12103 }
12104
12105 out = expand_simple_binop (mode, AND, copy_rtx (out),
12106 gen_int_mode (cf - ct, mode),
12107 copy_rtx (out), 1, OPTAB_DIRECT);
12108 if (ct)
12109 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12110 copy_rtx (out), 1, OPTAB_DIRECT);
12111 if (!rtx_equal_p (out, operands[0]))
12112 emit_move_insn (operands[0], copy_rtx (out));
12113
12114 return 1; /* DONE */
12115 }
12116 }
12117
12118 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12119 {
12120 /* Try a few things more with specific constants and a variable. */
12121
12122 optab op;
12123 rtx var, orig_out, out, tmp;
12124
12125 if (BRANCH_COST <= 2)
12126 return 0; /* FAIL */
12127
12128 /* If one of the two operands is an interesting constant, load a
12129 constant with the above and mask it in with a logical operation. */
12130
12131 if (CONST_INT_P (operands[2]))
12132 {
12133 var = operands[3];
12134 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12135 operands[3] = constm1_rtx, op = and_optab;
12136 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12137 operands[3] = const0_rtx, op = ior_optab;
12138 else
12139 return 0; /* FAIL */
12140 }
12141 else if (CONST_INT_P (operands[3]))
12142 {
12143 var = operands[2];
12144 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12145 operands[2] = constm1_rtx, op = and_optab;
12146 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12147 operands[2] = const0_rtx, op = ior_optab;
12148 else
12149 return 0; /* FAIL */
12150 }
12151 else
12152 return 0; /* FAIL */
12153
12154 orig_out = operands[0];
12155 tmp = gen_reg_rtx (mode);
12156 operands[0] = tmp;
12157
12158 /* Recurse to get the constant loaded. */
12159 if (ix86_expand_int_movcc (operands) == 0)
12160 return 0; /* FAIL */
12161
12162 /* Mask in the interesting variable. */
12163 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12164 OPTAB_WIDEN);
12165 if (!rtx_equal_p (out, orig_out))
12166 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12167
12168 return 1; /* DONE */
12169 }
12170
12171 /*
12172 * For comparison with above,
12173 *
12174 * movl cf,dest
12175 * movl ct,tmp
12176 * cmpl op1,op2
12177 * cmovcc tmp,dest
12178 *
12179 * Size 15.
12180 */
12181
12182 if (! nonimmediate_operand (operands[2], mode))
12183 operands[2] = force_reg (mode, operands[2]);
12184 if (! nonimmediate_operand (operands[3], mode))
12185 operands[3] = force_reg (mode, operands[3]);
12186
12187 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12188 {
12189 rtx tmp = gen_reg_rtx (mode);
12190 emit_move_insn (tmp, operands[3]);
12191 operands[3] = tmp;
12192 }
12193 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12194 {
12195 rtx tmp = gen_reg_rtx (mode);
12196 emit_move_insn (tmp, operands[2]);
12197 operands[2] = tmp;
12198 }
12199
12200 if (! register_operand (operands[2], VOIDmode)
12201 && (mode == QImode
12202 || ! register_operand (operands[3], VOIDmode)))
12203 operands[2] = force_reg (mode, operands[2]);
12204
12205 if (mode == QImode
12206 && ! register_operand (operands[3], VOIDmode))
12207 operands[3] = force_reg (mode, operands[3]);
12208
12209 emit_insn (compare_seq);
12210 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12211 gen_rtx_IF_THEN_ELSE (mode,
12212 compare_op, operands[2],
12213 operands[3])));
12214 if (bypass_test)
12215 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12216 gen_rtx_IF_THEN_ELSE (mode,
12217 bypass_test,
12218 copy_rtx (operands[3]),
12219 copy_rtx (operands[0]))));
12220 if (second_test)
12221 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12222 gen_rtx_IF_THEN_ELSE (mode,
12223 second_test,
12224 copy_rtx (operands[2]),
12225 copy_rtx (operands[0]))));
12226
12227 return 1; /* DONE */
12228 }
12229
12230 /* Swap, force into registers, or otherwise massage the two operands
12231 to an sse comparison with a mask result. Thus we differ a bit from
12232 ix86_prepare_fp_compare_args which expects to produce a flags result.
12233
12234 The DEST operand exists to help determine whether to commute commutative
12235 operators. The POP0/POP1 operands are updated in place. The new
12236 comparison code is returned, or UNKNOWN if not implementable. */
12237
12238 static enum rtx_code
12239 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12240 rtx *pop0, rtx *pop1)
12241 {
12242 rtx tmp;
12243
12244 switch (code)
12245 {
12246 case LTGT:
12247 case UNEQ:
12248 /* We have no LTGT as an operator. We could implement it with
12249 NE & ORDERED, but this requires an extra temporary. It's
12250 not clear that it's worth it. */
12251 return UNKNOWN;
12252
12253 case LT:
12254 case LE:
12255 case UNGT:
12256 case UNGE:
12257 /* These are supported directly. */
12258 break;
12259
12260 case EQ:
12261 case NE:
12262 case UNORDERED:
12263 case ORDERED:
12264 /* For commutative operators, try to canonicalize the destination
12265 operand to be first in the comparison - this helps reload to
12266 avoid extra moves. */
12267 if (!dest || !rtx_equal_p (dest, *pop1))
12268 break;
12269 /* FALLTHRU */
12270
12271 case GE:
12272 case GT:
12273 case UNLE:
12274 case UNLT:
12275 /* These are not supported directly. Swap the comparison operands
12276 to transform into something that is supported. */
12277 tmp = *pop0;
12278 *pop0 = *pop1;
12279 *pop1 = tmp;
12280 code = swap_condition (code);
12281 break;
12282
12283 default:
12284 gcc_unreachable ();
12285 }
12286
12287 return code;
12288 }
12289
12290 /* Detect conditional moves that exactly match min/max operational
12291 semantics. Note that this is IEEE safe, as long as we don't
12292 interchange the operands.
12293
12294 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12295 and TRUE if the operation is successful and instructions are emitted. */
12296
12297 static bool
12298 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12299 rtx cmp_op1, rtx if_true, rtx if_false)
12300 {
12301 enum machine_mode mode;
12302 bool is_min;
12303 rtx tmp;
12304
12305 if (code == LT)
12306 ;
12307 else if (code == UNGE)
12308 {
12309 tmp = if_true;
12310 if_true = if_false;
12311 if_false = tmp;
12312 }
12313 else
12314 return false;
12315
12316 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12317 is_min = true;
12318 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12319 is_min = false;
12320 else
12321 return false;
12322
12323 mode = GET_MODE (dest);
12324
12325 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12326 but MODE may be a vector mode and thus not appropriate. */
12327 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12328 {
12329 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12330 rtvec v;
12331
12332 if_true = force_reg (mode, if_true);
12333 v = gen_rtvec (2, if_true, if_false);
12334 tmp = gen_rtx_UNSPEC (mode, v, u);
12335 }
12336 else
12337 {
12338 code = is_min ? SMIN : SMAX;
12339 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12340 }
12341
12342 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12343 return true;
12344 }
12345
12346 /* Expand an sse vector comparison. Return the register with the result. */
12347
12348 static rtx
12349 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12350 rtx op_true, rtx op_false)
12351 {
12352 enum machine_mode mode = GET_MODE (dest);
12353 rtx x;
12354
12355 cmp_op0 = force_reg (mode, cmp_op0);
12356 if (!nonimmediate_operand (cmp_op1, mode))
12357 cmp_op1 = force_reg (mode, cmp_op1);
12358
12359 if (optimize
12360 || reg_overlap_mentioned_p (dest, op_true)
12361 || reg_overlap_mentioned_p (dest, op_false))
12362 dest = gen_reg_rtx (mode);
12363
12364 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12365 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12366
12367 return dest;
12368 }
12369
12370 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12371 operations. This is used for both scalar and vector conditional moves. */
12372
12373 static void
12374 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12375 {
12376 enum machine_mode mode = GET_MODE (dest);
12377 rtx t2, t3, x;
12378
12379 if (op_false == CONST0_RTX (mode))
12380 {
12381 op_true = force_reg (mode, op_true);
12382 x = gen_rtx_AND (mode, cmp, op_true);
12383 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12384 }
12385 else if (op_true == CONST0_RTX (mode))
12386 {
12387 op_false = force_reg (mode, op_false);
12388 x = gen_rtx_NOT (mode, cmp);
12389 x = gen_rtx_AND (mode, x, op_false);
12390 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12391 }
12392 else
12393 {
12394 op_true = force_reg (mode, op_true);
12395 op_false = force_reg (mode, op_false);
12396
12397 t2 = gen_reg_rtx (mode);
12398 if (optimize)
12399 t3 = gen_reg_rtx (mode);
12400 else
12401 t3 = dest;
12402
12403 x = gen_rtx_AND (mode, op_true, cmp);
12404 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12405
12406 x = gen_rtx_NOT (mode, cmp);
12407 x = gen_rtx_AND (mode, x, op_false);
12408 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12409
12410 x = gen_rtx_IOR (mode, t3, t2);
12411 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12412 }
12413 }
12414
12415 /* Expand a floating-point conditional move. Return true if successful. */
12416
12417 int
12418 ix86_expand_fp_movcc (rtx operands[])
12419 {
12420 enum machine_mode mode = GET_MODE (operands[0]);
12421 enum rtx_code code = GET_CODE (operands[1]);
12422 rtx tmp, compare_op, second_test, bypass_test;
12423
12424 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12425 {
12426 enum machine_mode cmode;
12427
12428 /* Since we've no cmove for sse registers, don't force bad register
12429 allocation just to gain access to it. Deny movcc when the
12430 comparison mode doesn't match the move mode. */
12431 cmode = GET_MODE (ix86_compare_op0);
12432 if (cmode == VOIDmode)
12433 cmode = GET_MODE (ix86_compare_op1);
12434 if (cmode != mode)
12435 return 0;
12436
12437 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12438 &ix86_compare_op0,
12439 &ix86_compare_op1);
12440 if (code == UNKNOWN)
12441 return 0;
12442
12443 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12444 ix86_compare_op1, operands[2],
12445 operands[3]))
12446 return 1;
12447
12448 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12449 ix86_compare_op1, operands[2], operands[3]);
12450 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12451 return 1;
12452 }
12453
12454 /* The floating point conditional move instructions don't directly
12455 support conditions resulting from a signed integer comparison. */
12456
12457 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12458
12459 /* The floating point conditional move instructions don't directly
12460 support signed integer comparisons. */
12461
12462 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12463 {
12464 gcc_assert (!second_test && !bypass_test);
12465 tmp = gen_reg_rtx (QImode);
12466 ix86_expand_setcc (code, tmp);
12467 code = NE;
12468 ix86_compare_op0 = tmp;
12469 ix86_compare_op1 = const0_rtx;
12470 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12471 }
12472 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12473 {
12474 tmp = gen_reg_rtx (mode);
12475 emit_move_insn (tmp, operands[3]);
12476 operands[3] = tmp;
12477 }
12478 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12479 {
12480 tmp = gen_reg_rtx (mode);
12481 emit_move_insn (tmp, operands[2]);
12482 operands[2] = tmp;
12483 }
12484
12485 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12486 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12487 operands[2], operands[3])));
12488 if (bypass_test)
12489 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12490 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12491 operands[3], operands[0])));
12492 if (second_test)
12493 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12494 gen_rtx_IF_THEN_ELSE (mode, second_test,
12495 operands[2], operands[0])));
12496
12497 return 1;
12498 }
12499
12500 /* Expand a floating-point vector conditional move; a vcond operation
12501 rather than a movcc operation. */
12502
12503 bool
12504 ix86_expand_fp_vcond (rtx operands[])
12505 {
12506 enum rtx_code code = GET_CODE (operands[3]);
12507 rtx cmp;
12508
12509 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12510 &operands[4], &operands[5]);
12511 if (code == UNKNOWN)
12512 return false;
12513
12514 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12515 operands[5], operands[1], operands[2]))
12516 return true;
12517
12518 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12519 operands[1], operands[2]);
12520 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12521 return true;
12522 }
12523
12524 /* Expand a signed integral vector conditional move. */
12525
12526 bool
12527 ix86_expand_int_vcond (rtx operands[])
12528 {
12529 enum machine_mode mode = GET_MODE (operands[0]);
12530 enum rtx_code code = GET_CODE (operands[3]);
12531 bool negate = false;
12532 rtx x, cop0, cop1;
12533
12534 cop0 = operands[4];
12535 cop1 = operands[5];
12536
12537 /* Canonicalize the comparison to EQ, GT, GTU. */
12538 switch (code)
12539 {
12540 case EQ:
12541 case GT:
12542 case GTU:
12543 break;
12544
12545 case NE:
12546 case LE:
12547 case LEU:
12548 code = reverse_condition (code);
12549 negate = true;
12550 break;
12551
12552 case GE:
12553 case GEU:
12554 code = reverse_condition (code);
12555 negate = true;
12556 /* FALLTHRU */
12557
12558 case LT:
12559 case LTU:
12560 code = swap_condition (code);
12561 x = cop0, cop0 = cop1, cop1 = x;
12562 break;
12563
12564 default:
12565 gcc_unreachable ();
12566 }
12567
12568 /* Unsigned parallel compare is not supported by the hardware. Play some
12569 tricks to turn this into a signed comparison against 0. */
12570 if (code == GTU)
12571 {
12572 cop0 = force_reg (mode, cop0);
12573
12574 switch (mode)
12575 {
12576 case V4SImode:
12577 {
12578 rtx t1, t2, mask;
12579
12580 /* Perform a parallel modulo subtraction. */
12581 t1 = gen_reg_rtx (mode);
12582 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12583
12584 /* Extract the original sign bit of op0. */
12585 mask = GEN_INT (-0x80000000);
12586 mask = gen_rtx_CONST_VECTOR (mode,
12587 gen_rtvec (4, mask, mask, mask, mask));
12588 mask = force_reg (mode, mask);
12589 t2 = gen_reg_rtx (mode);
12590 emit_insn (gen_andv4si3 (t2, cop0, mask));
12591
12592 /* XOR it back into the result of the subtraction. This results
12593 in the sign bit set iff we saw unsigned underflow. */
12594 x = gen_reg_rtx (mode);
12595 emit_insn (gen_xorv4si3 (x, t1, t2));
12596
12597 code = GT;
12598 }
12599 break;
12600
12601 case V16QImode:
12602 case V8HImode:
12603 /* Perform a parallel unsigned saturating subtraction. */
12604 x = gen_reg_rtx (mode);
12605 emit_insn (gen_rtx_SET (VOIDmode, x,
12606 gen_rtx_US_MINUS (mode, cop0, cop1)));
12607
12608 code = EQ;
12609 negate = !negate;
12610 break;
12611
12612 default:
12613 gcc_unreachable ();
12614 }
12615
12616 cop0 = x;
12617 cop1 = CONST0_RTX (mode);
12618 }
12619
12620 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12621 operands[1+negate], operands[2-negate]);
12622
12623 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12624 operands[2-negate]);
12625 return true;
12626 }
12627
12628 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12629 true if we should do zero extension, else sign extension. HIGH_P is
12630 true if we want the N/2 high elements, else the low elements. */
12631
12632 void
12633 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12634 {
12635 enum machine_mode imode = GET_MODE (operands[1]);
12636 rtx (*unpack)(rtx, rtx, rtx);
12637 rtx se, dest;
12638
12639 switch (imode)
12640 {
12641 case V16QImode:
12642 if (high_p)
12643 unpack = gen_vec_interleave_highv16qi;
12644 else
12645 unpack = gen_vec_interleave_lowv16qi;
12646 break;
12647 case V8HImode:
12648 if (high_p)
12649 unpack = gen_vec_interleave_highv8hi;
12650 else
12651 unpack = gen_vec_interleave_lowv8hi;
12652 break;
12653 case V4SImode:
12654 if (high_p)
12655 unpack = gen_vec_interleave_highv4si;
12656 else
12657 unpack = gen_vec_interleave_lowv4si;
12658 break;
12659 default:
12660 gcc_unreachable ();
12661 }
12662
12663 dest = gen_lowpart (imode, operands[0]);
12664
12665 if (unsigned_p)
12666 se = force_reg (imode, CONST0_RTX (imode));
12667 else
12668 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12669 operands[1], pc_rtx, pc_rtx);
12670
12671 emit_insn (unpack (dest, operands[1], se));
12672 }
12673
12674 /* Expand conditional increment or decrement using adb/sbb instructions.
12675 The default case using setcc followed by the conditional move can be
12676 done by generic code. */
12677 int
12678 ix86_expand_int_addcc (rtx operands[])
12679 {
12680 enum rtx_code code = GET_CODE (operands[1]);
12681 rtx compare_op;
12682 rtx val = const0_rtx;
12683 bool fpcmp = false;
12684 enum machine_mode mode = GET_MODE (operands[0]);
12685
12686 if (operands[3] != const1_rtx
12687 && operands[3] != constm1_rtx)
12688 return 0;
12689 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12690 ix86_compare_op1, &compare_op))
12691 return 0;
12692 code = GET_CODE (compare_op);
12693
12694 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12695 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12696 {
12697 fpcmp = true;
12698 code = ix86_fp_compare_code_to_integer (code);
12699 }
12700
12701 if (code != LTU)
12702 {
12703 val = constm1_rtx;
12704 if (fpcmp)
12705 PUT_CODE (compare_op,
12706 reverse_condition_maybe_unordered
12707 (GET_CODE (compare_op)));
12708 else
12709 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12710 }
12711 PUT_MODE (compare_op, mode);
12712
12713 /* Construct either adc or sbb insn. */
12714 if ((code == LTU) == (operands[3] == constm1_rtx))
12715 {
12716 switch (GET_MODE (operands[0]))
12717 {
12718 case QImode:
12719 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12720 break;
12721 case HImode:
12722 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12723 break;
12724 case SImode:
12725 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12726 break;
12727 case DImode:
12728 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12729 break;
12730 default:
12731 gcc_unreachable ();
12732 }
12733 }
12734 else
12735 {
12736 switch (GET_MODE (operands[0]))
12737 {
12738 case QImode:
12739 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12740 break;
12741 case HImode:
12742 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12743 break;
12744 case SImode:
12745 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12746 break;
12747 case DImode:
12748 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12749 break;
12750 default:
12751 gcc_unreachable ();
12752 }
12753 }
12754 return 1; /* DONE */
12755 }
12756
12757
12758 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12759 works for floating pointer parameters and nonoffsetable memories.
12760 For pushes, it returns just stack offsets; the values will be saved
12761 in the right order. Maximally three parts are generated. */
12762
12763 static int
12764 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12765 {
12766 int size;
12767
12768 if (!TARGET_64BIT)
12769 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12770 else
12771 size = (GET_MODE_SIZE (mode) + 4) / 8;
12772
12773 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12774 gcc_assert (size >= 2 && size <= 3);
12775
12776 /* Optimize constant pool reference to immediates. This is used by fp
12777 moves, that force all constants to memory to allow combining. */
12778 if (MEM_P (operand) && MEM_READONLY_P (operand))
12779 {
12780 rtx tmp = maybe_get_pool_constant (operand);
12781 if (tmp)
12782 operand = tmp;
12783 }
12784
12785 if (MEM_P (operand) && !offsettable_memref_p (operand))
12786 {
12787 /* The only non-offsetable memories we handle are pushes. */
12788 int ok = push_operand (operand, VOIDmode);
12789
12790 gcc_assert (ok);
12791
12792 operand = copy_rtx (operand);
12793 PUT_MODE (operand, Pmode);
12794 parts[0] = parts[1] = parts[2] = operand;
12795 return size;
12796 }
12797
12798 if (GET_CODE (operand) == CONST_VECTOR)
12799 {
12800 enum machine_mode imode = int_mode_for_mode (mode);
12801 /* Caution: if we looked through a constant pool memory above,
12802 the operand may actually have a different mode now. That's
12803 ok, since we want to pun this all the way back to an integer. */
12804 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12805 gcc_assert (operand != NULL);
12806 mode = imode;
12807 }
12808
12809 if (!TARGET_64BIT)
12810 {
12811 if (mode == DImode)
12812 split_di (&operand, 1, &parts[0], &parts[1]);
12813 else
12814 {
12815 if (REG_P (operand))
12816 {
12817 gcc_assert (reload_completed);
12818 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12819 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12820 if (size == 3)
12821 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12822 }
12823 else if (offsettable_memref_p (operand))
12824 {
12825 operand = adjust_address (operand, SImode, 0);
12826 parts[0] = operand;
12827 parts[1] = adjust_address (operand, SImode, 4);
12828 if (size == 3)
12829 parts[2] = adjust_address (operand, SImode, 8);
12830 }
12831 else if (GET_CODE (operand) == CONST_DOUBLE)
12832 {
12833 REAL_VALUE_TYPE r;
12834 long l[4];
12835
12836 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12837 switch (mode)
12838 {
12839 case XFmode:
12840 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12841 parts[2] = gen_int_mode (l[2], SImode);
12842 break;
12843 case DFmode:
12844 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12845 break;
12846 default:
12847 gcc_unreachable ();
12848 }
12849 parts[1] = gen_int_mode (l[1], SImode);
12850 parts[0] = gen_int_mode (l[0], SImode);
12851 }
12852 else
12853 gcc_unreachable ();
12854 }
12855 }
12856 else
12857 {
12858 if (mode == TImode)
12859 split_ti (&operand, 1, &parts[0], &parts[1]);
12860 if (mode == XFmode || mode == TFmode)
12861 {
12862 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12863 if (REG_P (operand))
12864 {
12865 gcc_assert (reload_completed);
12866 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12867 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12868 }
12869 else if (offsettable_memref_p (operand))
12870 {
12871 operand = adjust_address (operand, DImode, 0);
12872 parts[0] = operand;
12873 parts[1] = adjust_address (operand, upper_mode, 8);
12874 }
12875 else if (GET_CODE (operand) == CONST_DOUBLE)
12876 {
12877 REAL_VALUE_TYPE r;
12878 long l[4];
12879
12880 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12881 real_to_target (l, &r, mode);
12882
12883 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12884 if (HOST_BITS_PER_WIDE_INT >= 64)
12885 parts[0]
12886 = gen_int_mode
12887 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12888 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12889 DImode);
12890 else
12891 parts[0] = immed_double_const (l[0], l[1], DImode);
12892
12893 if (upper_mode == SImode)
12894 parts[1] = gen_int_mode (l[2], SImode);
12895 else if (HOST_BITS_PER_WIDE_INT >= 64)
12896 parts[1]
12897 = gen_int_mode
12898 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12899 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12900 DImode);
12901 else
12902 parts[1] = immed_double_const (l[2], l[3], DImode);
12903 }
12904 else
12905 gcc_unreachable ();
12906 }
12907 }
12908
12909 return size;
12910 }
12911
12912 /* Emit insns to perform a move or push of DI, DF, and XF values.
12913 Return false when normal moves are needed; true when all required
12914 insns have been emitted. Operands 2-4 contain the input values
12915 int the correct order; operands 5-7 contain the output values. */
12916
12917 void
12918 ix86_split_long_move (rtx operands[])
12919 {
12920 rtx part[2][3];
12921 int nparts;
12922 int push = 0;
12923 int collisions = 0;
12924 enum machine_mode mode = GET_MODE (operands[0]);
12925
12926 /* The DFmode expanders may ask us to move double.
12927 For 64bit target this is single move. By hiding the fact
12928 here we simplify i386.md splitters. */
12929 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12930 {
12931 /* Optimize constant pool reference to immediates. This is used by
12932 fp moves, that force all constants to memory to allow combining. */
12933
12934 if (MEM_P (operands[1])
12935 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12936 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12937 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12938 if (push_operand (operands[0], VOIDmode))
12939 {
12940 operands[0] = copy_rtx (operands[0]);
12941 PUT_MODE (operands[0], Pmode);
12942 }
12943 else
12944 operands[0] = gen_lowpart (DImode, operands[0]);
12945 operands[1] = gen_lowpart (DImode, operands[1]);
12946 emit_move_insn (operands[0], operands[1]);
12947 return;
12948 }
12949
12950 /* The only non-offsettable memory we handle is push. */
12951 if (push_operand (operands[0], VOIDmode))
12952 push = 1;
12953 else
12954 gcc_assert (!MEM_P (operands[0])
12955 || offsettable_memref_p (operands[0]));
12956
12957 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12958 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12959
12960 /* When emitting push, take care for source operands on the stack. */
12961 if (push && MEM_P (operands[1])
12962 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12963 {
12964 if (nparts == 3)
12965 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12966 XEXP (part[1][2], 0));
12967 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12968 XEXP (part[1][1], 0));
12969 }
12970
12971 /* We need to do copy in the right order in case an address register
12972 of the source overlaps the destination. */
12973 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12974 {
12975 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12976 collisions++;
12977 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12978 collisions++;
12979 if (nparts == 3
12980 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12981 collisions++;
12982
12983 /* Collision in the middle part can be handled by reordering. */
12984 if (collisions == 1 && nparts == 3
12985 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12986 {
12987 rtx tmp;
12988 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12989 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12990 }
12991
12992 /* If there are more collisions, we can't handle it by reordering.
12993 Do an lea to the last part and use only one colliding move. */
12994 else if (collisions > 1)
12995 {
12996 rtx base;
12997
12998 collisions = 1;
12999
13000 base = part[0][nparts - 1];
13001
13002 /* Handle the case when the last part isn't valid for lea.
13003 Happens in 64-bit mode storing the 12-byte XFmode. */
13004 if (GET_MODE (base) != Pmode)
13005 base = gen_rtx_REG (Pmode, REGNO (base));
13006
13007 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13008 part[1][0] = replace_equiv_address (part[1][0], base);
13009 part[1][1] = replace_equiv_address (part[1][1],
13010 plus_constant (base, UNITS_PER_WORD));
13011 if (nparts == 3)
13012 part[1][2] = replace_equiv_address (part[1][2],
13013 plus_constant (base, 8));
13014 }
13015 }
13016
13017 if (push)
13018 {
13019 if (!TARGET_64BIT)
13020 {
13021 if (nparts == 3)
13022 {
13023 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13024 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13025 emit_move_insn (part[0][2], part[1][2]);
13026 }
13027 }
13028 else
13029 {
13030 /* In 64bit mode we don't have 32bit push available. In case this is
13031 register, it is OK - we will just use larger counterpart. We also
13032 retype memory - these comes from attempt to avoid REX prefix on
13033 moving of second half of TFmode value. */
13034 if (GET_MODE (part[1][1]) == SImode)
13035 {
13036 switch (GET_CODE (part[1][1]))
13037 {
13038 case MEM:
13039 part[1][1] = adjust_address (part[1][1], DImode, 0);
13040 break;
13041
13042 case REG:
13043 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13044 break;
13045
13046 default:
13047 gcc_unreachable ();
13048 }
13049
13050 if (GET_MODE (part[1][0]) == SImode)
13051 part[1][0] = part[1][1];
13052 }
13053 }
13054 emit_move_insn (part[0][1], part[1][1]);
13055 emit_move_insn (part[0][0], part[1][0]);
13056 return;
13057 }
13058
13059 /* Choose correct order to not overwrite the source before it is copied. */
13060 if ((REG_P (part[0][0])
13061 && REG_P (part[1][1])
13062 && (REGNO (part[0][0]) == REGNO (part[1][1])
13063 || (nparts == 3
13064 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13065 || (collisions > 0
13066 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13067 {
13068 if (nparts == 3)
13069 {
13070 operands[2] = part[0][2];
13071 operands[3] = part[0][1];
13072 operands[4] = part[0][0];
13073 operands[5] = part[1][2];
13074 operands[6] = part[1][1];
13075 operands[7] = part[1][0];
13076 }
13077 else
13078 {
13079 operands[2] = part[0][1];
13080 operands[3] = part[0][0];
13081 operands[5] = part[1][1];
13082 operands[6] = part[1][0];
13083 }
13084 }
13085 else
13086 {
13087 if (nparts == 3)
13088 {
13089 operands[2] = part[0][0];
13090 operands[3] = part[0][1];
13091 operands[4] = part[0][2];
13092 operands[5] = part[1][0];
13093 operands[6] = part[1][1];
13094 operands[7] = part[1][2];
13095 }
13096 else
13097 {
13098 operands[2] = part[0][0];
13099 operands[3] = part[0][1];
13100 operands[5] = part[1][0];
13101 operands[6] = part[1][1];
13102 }
13103 }
13104
13105 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13106 if (optimize_size)
13107 {
13108 if (CONST_INT_P (operands[5])
13109 && operands[5] != const0_rtx
13110 && REG_P (operands[2]))
13111 {
13112 if (CONST_INT_P (operands[6])
13113 && INTVAL (operands[6]) == INTVAL (operands[5]))
13114 operands[6] = operands[2];
13115
13116 if (nparts == 3
13117 && CONST_INT_P (operands[7])
13118 && INTVAL (operands[7]) == INTVAL (operands[5]))
13119 operands[7] = operands[2];
13120 }
13121
13122 if (nparts == 3
13123 && CONST_INT_P (operands[6])
13124 && operands[6] != const0_rtx
13125 && REG_P (operands[3])
13126 && CONST_INT_P (operands[7])
13127 && INTVAL (operands[7]) == INTVAL (operands[6]))
13128 operands[7] = operands[3];
13129 }
13130
13131 emit_move_insn (operands[2], operands[5]);
13132 emit_move_insn (operands[3], operands[6]);
13133 if (nparts == 3)
13134 emit_move_insn (operands[4], operands[7]);
13135
13136 return;
13137 }
13138
13139 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13140 left shift by a constant, either using a single shift or
13141 a sequence of add instructions. */
13142
13143 static void
13144 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13145 {
13146 if (count == 1)
13147 {
13148 emit_insn ((mode == DImode
13149 ? gen_addsi3
13150 : gen_adddi3) (operand, operand, operand));
13151 }
13152 else if (!optimize_size
13153 && count * ix86_cost->add <= ix86_cost->shift_const)
13154 {
13155 int i;
13156 for (i=0; i<count; i++)
13157 {
13158 emit_insn ((mode == DImode
13159 ? gen_addsi3
13160 : gen_adddi3) (operand, operand, operand));
13161 }
13162 }
13163 else
13164 emit_insn ((mode == DImode
13165 ? gen_ashlsi3
13166 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13167 }
13168
13169 void
13170 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13171 {
13172 rtx low[2], high[2];
13173 int count;
13174 const int single_width = mode == DImode ? 32 : 64;
13175
13176 if (CONST_INT_P (operands[2]))
13177 {
13178 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13179 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13180
13181 if (count >= single_width)
13182 {
13183 emit_move_insn (high[0], low[1]);
13184 emit_move_insn (low[0], const0_rtx);
13185
13186 if (count > single_width)
13187 ix86_expand_ashl_const (high[0], count - single_width, mode);
13188 }
13189 else
13190 {
13191 if (!rtx_equal_p (operands[0], operands[1]))
13192 emit_move_insn (operands[0], operands[1]);
13193 emit_insn ((mode == DImode
13194 ? gen_x86_shld_1
13195 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13196 ix86_expand_ashl_const (low[0], count, mode);
13197 }
13198 return;
13199 }
13200
13201 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13202
13203 if (operands[1] == const1_rtx)
13204 {
13205 /* Assuming we've chosen a QImode capable registers, then 1 << N
13206 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13207 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13208 {
13209 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13210
13211 ix86_expand_clear (low[0]);
13212 ix86_expand_clear (high[0]);
13213 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13214
13215 d = gen_lowpart (QImode, low[0]);
13216 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13217 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13218 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13219
13220 d = gen_lowpart (QImode, high[0]);
13221 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13222 s = gen_rtx_NE (QImode, flags, const0_rtx);
13223 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13224 }
13225
13226 /* Otherwise, we can get the same results by manually performing
13227 a bit extract operation on bit 5/6, and then performing the two
13228 shifts. The two methods of getting 0/1 into low/high are exactly
13229 the same size. Avoiding the shift in the bit extract case helps
13230 pentium4 a bit; no one else seems to care much either way. */
13231 else
13232 {
13233 rtx x;
13234
13235 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13236 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13237 else
13238 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13239 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13240
13241 emit_insn ((mode == DImode
13242 ? gen_lshrsi3
13243 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13244 emit_insn ((mode == DImode
13245 ? gen_andsi3
13246 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13247 emit_move_insn (low[0], high[0]);
13248 emit_insn ((mode == DImode
13249 ? gen_xorsi3
13250 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13251 }
13252
13253 emit_insn ((mode == DImode
13254 ? gen_ashlsi3
13255 : gen_ashldi3) (low[0], low[0], operands[2]));
13256 emit_insn ((mode == DImode
13257 ? gen_ashlsi3
13258 : gen_ashldi3) (high[0], high[0], operands[2]));
13259 return;
13260 }
13261
13262 if (operands[1] == constm1_rtx)
13263 {
13264 /* For -1 << N, we can avoid the shld instruction, because we
13265 know that we're shifting 0...31/63 ones into a -1. */
13266 emit_move_insn (low[0], constm1_rtx);
13267 if (optimize_size)
13268 emit_move_insn (high[0], low[0]);
13269 else
13270 emit_move_insn (high[0], constm1_rtx);
13271 }
13272 else
13273 {
13274 if (!rtx_equal_p (operands[0], operands[1]))
13275 emit_move_insn (operands[0], operands[1]);
13276
13277 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13278 emit_insn ((mode == DImode
13279 ? gen_x86_shld_1
13280 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13281 }
13282
13283 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13284
13285 if (TARGET_CMOVE && scratch)
13286 {
13287 ix86_expand_clear (scratch);
13288 emit_insn ((mode == DImode
13289 ? gen_x86_shift_adj_1
13290 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13291 }
13292 else
13293 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13294 }
13295
13296 void
13297 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13298 {
13299 rtx low[2], high[2];
13300 int count;
13301 const int single_width = mode == DImode ? 32 : 64;
13302
13303 if (CONST_INT_P (operands[2]))
13304 {
13305 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13306 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13307
13308 if (count == single_width * 2 - 1)
13309 {
13310 emit_move_insn (high[0], high[1]);
13311 emit_insn ((mode == DImode
13312 ? gen_ashrsi3
13313 : gen_ashrdi3) (high[0], high[0],
13314 GEN_INT (single_width - 1)));
13315 emit_move_insn (low[0], high[0]);
13316
13317 }
13318 else if (count >= single_width)
13319 {
13320 emit_move_insn (low[0], high[1]);
13321 emit_move_insn (high[0], low[0]);
13322 emit_insn ((mode == DImode
13323 ? gen_ashrsi3
13324 : gen_ashrdi3) (high[0], high[0],
13325 GEN_INT (single_width - 1)));
13326 if (count > single_width)
13327 emit_insn ((mode == DImode
13328 ? gen_ashrsi3
13329 : gen_ashrdi3) (low[0], low[0],
13330 GEN_INT (count - single_width)));
13331 }
13332 else
13333 {
13334 if (!rtx_equal_p (operands[0], operands[1]))
13335 emit_move_insn (operands[0], operands[1]);
13336 emit_insn ((mode == DImode
13337 ? gen_x86_shrd_1
13338 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13339 emit_insn ((mode == DImode
13340 ? gen_ashrsi3
13341 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13342 }
13343 }
13344 else
13345 {
13346 if (!rtx_equal_p (operands[0], operands[1]))
13347 emit_move_insn (operands[0], operands[1]);
13348
13349 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13350
13351 emit_insn ((mode == DImode
13352 ? gen_x86_shrd_1
13353 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13354 emit_insn ((mode == DImode
13355 ? gen_ashrsi3
13356 : gen_ashrdi3) (high[0], high[0], operands[2]));
13357
13358 if (TARGET_CMOVE && scratch)
13359 {
13360 emit_move_insn (scratch, high[0]);
13361 emit_insn ((mode == DImode
13362 ? gen_ashrsi3
13363 : gen_ashrdi3) (scratch, scratch,
13364 GEN_INT (single_width - 1)));
13365 emit_insn ((mode == DImode
13366 ? gen_x86_shift_adj_1
13367 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13368 scratch));
13369 }
13370 else
13371 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13372 }
13373 }
13374
13375 void
13376 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13377 {
13378 rtx low[2], high[2];
13379 int count;
13380 const int single_width = mode == DImode ? 32 : 64;
13381
13382 if (CONST_INT_P (operands[2]))
13383 {
13384 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13385 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13386
13387 if (count >= single_width)
13388 {
13389 emit_move_insn (low[0], high[1]);
13390 ix86_expand_clear (high[0]);
13391
13392 if (count > single_width)
13393 emit_insn ((mode == DImode
13394 ? gen_lshrsi3
13395 : gen_lshrdi3) (low[0], low[0],
13396 GEN_INT (count - single_width)));
13397 }
13398 else
13399 {
13400 if (!rtx_equal_p (operands[0], operands[1]))
13401 emit_move_insn (operands[0], operands[1]);
13402 emit_insn ((mode == DImode
13403 ? gen_x86_shrd_1
13404 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13405 emit_insn ((mode == DImode
13406 ? gen_lshrsi3
13407 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13408 }
13409 }
13410 else
13411 {
13412 if (!rtx_equal_p (operands[0], operands[1]))
13413 emit_move_insn (operands[0], operands[1]);
13414
13415 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13416
13417 emit_insn ((mode == DImode
13418 ? gen_x86_shrd_1
13419 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13420 emit_insn ((mode == DImode
13421 ? gen_lshrsi3
13422 : gen_lshrdi3) (high[0], high[0], operands[2]));
13423
13424 /* Heh. By reversing the arguments, we can reuse this pattern. */
13425 if (TARGET_CMOVE && scratch)
13426 {
13427 ix86_expand_clear (scratch);
13428 emit_insn ((mode == DImode
13429 ? gen_x86_shift_adj_1
13430 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13431 scratch));
13432 }
13433 else
13434 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13435 }
13436 }
13437
13438 /* Predict just emitted jump instruction to be taken with probability PROB. */
13439 static void
13440 predict_jump (int prob)
13441 {
13442 rtx insn = get_last_insn ();
13443 gcc_assert (JUMP_P (insn));
13444 REG_NOTES (insn)
13445 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13446 GEN_INT (prob),
13447 REG_NOTES (insn));
13448 }
13449
13450 /* Helper function for the string operations below. Dest VARIABLE whether
13451 it is aligned to VALUE bytes. If true, jump to the label. */
13452 static rtx
13453 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13454 {
13455 rtx label = gen_label_rtx ();
13456 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13457 if (GET_MODE (variable) == DImode)
13458 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13459 else
13460 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13461 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13462 1, label);
13463 if (epilogue)
13464 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13465 else
13466 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13467 return label;
13468 }
13469
13470 /* Adjust COUNTER by the VALUE. */
13471 static void
13472 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13473 {
13474 if (GET_MODE (countreg) == DImode)
13475 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13476 else
13477 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13478 }
13479
13480 /* Zero extend possibly SImode EXP to Pmode register. */
13481 rtx
13482 ix86_zero_extend_to_Pmode (rtx exp)
13483 {
13484 rtx r;
13485 if (GET_MODE (exp) == VOIDmode)
13486 return force_reg (Pmode, exp);
13487 if (GET_MODE (exp) == Pmode)
13488 return copy_to_mode_reg (Pmode, exp);
13489 r = gen_reg_rtx (Pmode);
13490 emit_insn (gen_zero_extendsidi2 (r, exp));
13491 return r;
13492 }
13493
13494 /* Divide COUNTREG by SCALE. */
13495 static rtx
13496 scale_counter (rtx countreg, int scale)
13497 {
13498 rtx sc;
13499 rtx piece_size_mask;
13500
13501 if (scale == 1)
13502 return countreg;
13503 if (CONST_INT_P (countreg))
13504 return GEN_INT (INTVAL (countreg) / scale);
13505 gcc_assert (REG_P (countreg));
13506
13507 piece_size_mask = GEN_INT (scale - 1);
13508 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13509 GEN_INT (exact_log2 (scale)),
13510 NULL, 1, OPTAB_DIRECT);
13511 return sc;
13512 }
13513
13514 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13515 for constant loop counts. */
13516
13517 static enum machine_mode
13518 counter_mode (rtx count_exp)
13519 {
13520 if (GET_MODE (count_exp) != VOIDmode)
13521 return GET_MODE (count_exp);
13522 if (GET_CODE (count_exp) != CONST_INT)
13523 return Pmode;
13524 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13525 return DImode;
13526 return SImode;
13527 }
13528
13529 /* When SRCPTR is non-NULL, output simple loop to move memory
13530 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13531 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13532 equivalent loop to set memory by VALUE (supposed to be in MODE).
13533
13534 The size is rounded down to whole number of chunk size moved at once.
13535 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13536
13537
13538 static void
13539 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13540 rtx destptr, rtx srcptr, rtx value,
13541 rtx count, enum machine_mode mode, int unroll,
13542 int expected_size)
13543 {
13544 rtx out_label, top_label, iter, tmp;
13545 enum machine_mode iter_mode = counter_mode (count);
13546 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13547 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13548 rtx size;
13549 rtx x_addr;
13550 rtx y_addr;
13551 int i;
13552
13553 top_label = gen_label_rtx ();
13554 out_label = gen_label_rtx ();
13555 iter = gen_reg_rtx (iter_mode);
13556
13557 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13558 NULL, 1, OPTAB_DIRECT);
13559 /* Those two should combine. */
13560 if (piece_size == const1_rtx)
13561 {
13562 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13563 true, out_label);
13564 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13565 }
13566 emit_move_insn (iter, const0_rtx);
13567
13568 emit_label (top_label);
13569
13570 tmp = convert_modes (Pmode, iter_mode, iter, true);
13571 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13572 destmem = change_address (destmem, mode, x_addr);
13573
13574 if (srcmem)
13575 {
13576 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13577 srcmem = change_address (srcmem, mode, y_addr);
13578
13579 /* When unrolling for chips that reorder memory reads and writes,
13580 we can save registers by using single temporary.
13581 Also using 4 temporaries is overkill in 32bit mode. */
13582 if (!TARGET_64BIT && 0)
13583 {
13584 for (i = 0; i < unroll; i++)
13585 {
13586 if (i)
13587 {
13588 destmem =
13589 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13590 srcmem =
13591 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13592 }
13593 emit_move_insn (destmem, srcmem);
13594 }
13595 }
13596 else
13597 {
13598 rtx tmpreg[4];
13599 gcc_assert (unroll <= 4);
13600 for (i = 0; i < unroll; i++)
13601 {
13602 tmpreg[i] = gen_reg_rtx (mode);
13603 if (i)
13604 {
13605 srcmem =
13606 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13607 }
13608 emit_move_insn (tmpreg[i], srcmem);
13609 }
13610 for (i = 0; i < unroll; i++)
13611 {
13612 if (i)
13613 {
13614 destmem =
13615 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13616 }
13617 emit_move_insn (destmem, tmpreg[i]);
13618 }
13619 }
13620 }
13621 else
13622 for (i = 0; i < unroll; i++)
13623 {
13624 if (i)
13625 destmem =
13626 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13627 emit_move_insn (destmem, value);
13628 }
13629
13630 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13631 true, OPTAB_LIB_WIDEN);
13632 if (tmp != iter)
13633 emit_move_insn (iter, tmp);
13634
13635 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13636 true, top_label);
13637 if (expected_size != -1)
13638 {
13639 expected_size /= GET_MODE_SIZE (mode) * unroll;
13640 if (expected_size == 0)
13641 predict_jump (0);
13642 else if (expected_size > REG_BR_PROB_BASE)
13643 predict_jump (REG_BR_PROB_BASE - 1);
13644 else
13645 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13646 }
13647 else
13648 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13649 iter = ix86_zero_extend_to_Pmode (iter);
13650 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13651 true, OPTAB_LIB_WIDEN);
13652 if (tmp != destptr)
13653 emit_move_insn (destptr, tmp);
13654 if (srcptr)
13655 {
13656 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13657 true, OPTAB_LIB_WIDEN);
13658 if (tmp != srcptr)
13659 emit_move_insn (srcptr, tmp);
13660 }
13661 emit_label (out_label);
13662 }
13663
13664 /* Output "rep; mov" instruction.
13665 Arguments have same meaning as for previous function */
13666 static void
13667 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13668 rtx destptr, rtx srcptr,
13669 rtx count,
13670 enum machine_mode mode)
13671 {
13672 rtx destexp;
13673 rtx srcexp;
13674 rtx countreg;
13675
13676 /* If the size is known, it is shorter to use rep movs. */
13677 if (mode == QImode && CONST_INT_P (count)
13678 && !(INTVAL (count) & 3))
13679 mode = SImode;
13680
13681 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13682 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13683 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13684 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13685 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13686 if (mode != QImode)
13687 {
13688 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13689 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13690 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13691 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13692 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13693 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13694 }
13695 else
13696 {
13697 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13698 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13699 }
13700 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13701 destexp, srcexp));
13702 }
13703
13704 /* Output "rep; stos" instruction.
13705 Arguments have same meaning as for previous function */
13706 static void
13707 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13708 rtx count,
13709 enum machine_mode mode)
13710 {
13711 rtx destexp;
13712 rtx countreg;
13713
13714 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13715 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13716 value = force_reg (mode, gen_lowpart (mode, value));
13717 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13718 if (mode != QImode)
13719 {
13720 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13721 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13722 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13723 }
13724 else
13725 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13726 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13727 }
13728
13729 static void
13730 emit_strmov (rtx destmem, rtx srcmem,
13731 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13732 {
13733 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13734 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13735 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13736 }
13737
13738 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13739 static void
13740 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13741 rtx destptr, rtx srcptr, rtx count, int max_size)
13742 {
13743 rtx src, dest;
13744 if (CONST_INT_P (count))
13745 {
13746 HOST_WIDE_INT countval = INTVAL (count);
13747 int offset = 0;
13748
13749 if ((countval & 0x10) && max_size > 16)
13750 {
13751 if (TARGET_64BIT)
13752 {
13753 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13754 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13755 }
13756 else
13757 gcc_unreachable ();
13758 offset += 16;
13759 }
13760 if ((countval & 0x08) && max_size > 8)
13761 {
13762 if (TARGET_64BIT)
13763 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13764 else
13765 {
13766 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13767 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13768 }
13769 offset += 8;
13770 }
13771 if ((countval & 0x04) && max_size > 4)
13772 {
13773 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13774 offset += 4;
13775 }
13776 if ((countval & 0x02) && max_size > 2)
13777 {
13778 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13779 offset += 2;
13780 }
13781 if ((countval & 0x01) && max_size > 1)
13782 {
13783 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13784 offset += 1;
13785 }
13786 return;
13787 }
13788 if (max_size > 8)
13789 {
13790 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13791 count, 1, OPTAB_DIRECT);
13792 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13793 count, QImode, 1, 4);
13794 return;
13795 }
13796
13797 /* When there are stringops, we can cheaply increase dest and src pointers.
13798 Otherwise we save code size by maintaining offset (zero is readily
13799 available from preceding rep operation) and using x86 addressing modes.
13800 */
13801 if (TARGET_SINGLE_STRINGOP)
13802 {
13803 if (max_size > 4)
13804 {
13805 rtx label = ix86_expand_aligntest (count, 4, true);
13806 src = change_address (srcmem, SImode, srcptr);
13807 dest = change_address (destmem, SImode, destptr);
13808 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13809 emit_label (label);
13810 LABEL_NUSES (label) = 1;
13811 }
13812 if (max_size > 2)
13813 {
13814 rtx label = ix86_expand_aligntest (count, 2, true);
13815 src = change_address (srcmem, HImode, srcptr);
13816 dest = change_address (destmem, HImode, destptr);
13817 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13818 emit_label (label);
13819 LABEL_NUSES (label) = 1;
13820 }
13821 if (max_size > 1)
13822 {
13823 rtx label = ix86_expand_aligntest (count, 1, true);
13824 src = change_address (srcmem, QImode, srcptr);
13825 dest = change_address (destmem, QImode, destptr);
13826 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13827 emit_label (label);
13828 LABEL_NUSES (label) = 1;
13829 }
13830 }
13831 else
13832 {
13833 rtx offset = force_reg (Pmode, const0_rtx);
13834 rtx tmp;
13835
13836 if (max_size > 4)
13837 {
13838 rtx label = ix86_expand_aligntest (count, 4, true);
13839 src = change_address (srcmem, SImode, srcptr);
13840 dest = change_address (destmem, SImode, destptr);
13841 emit_move_insn (dest, src);
13842 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13843 true, OPTAB_LIB_WIDEN);
13844 if (tmp != offset)
13845 emit_move_insn (offset, tmp);
13846 emit_label (label);
13847 LABEL_NUSES (label) = 1;
13848 }
13849 if (max_size > 2)
13850 {
13851 rtx label = ix86_expand_aligntest (count, 2, true);
13852 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13853 src = change_address (srcmem, HImode, tmp);
13854 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13855 dest = change_address (destmem, HImode, tmp);
13856 emit_move_insn (dest, src);
13857 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13858 true, OPTAB_LIB_WIDEN);
13859 if (tmp != offset)
13860 emit_move_insn (offset, tmp);
13861 emit_label (label);
13862 LABEL_NUSES (label) = 1;
13863 }
13864 if (max_size > 1)
13865 {
13866 rtx label = ix86_expand_aligntest (count, 1, true);
13867 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13868 src = change_address (srcmem, QImode, tmp);
13869 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13870 dest = change_address (destmem, QImode, tmp);
13871 emit_move_insn (dest, src);
13872 emit_label (label);
13873 LABEL_NUSES (label) = 1;
13874 }
13875 }
13876 }
13877
13878 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13879 static void
13880 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13881 rtx count, int max_size)
13882 {
13883 count =
13884 expand_simple_binop (counter_mode (count), AND, count,
13885 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13886 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13887 gen_lowpart (QImode, value), count, QImode,
13888 1, max_size / 2);
13889 }
13890
13891 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13892 static void
13893 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13894 {
13895 rtx dest;
13896
13897 if (CONST_INT_P (count))
13898 {
13899 HOST_WIDE_INT countval = INTVAL (count);
13900 int offset = 0;
13901
13902 if ((countval & 0x10) && max_size > 16)
13903 {
13904 if (TARGET_64BIT)
13905 {
13906 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13907 emit_insn (gen_strset (destptr, dest, value));
13908 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13909 emit_insn (gen_strset (destptr, dest, value));
13910 }
13911 else
13912 gcc_unreachable ();
13913 offset += 16;
13914 }
13915 if ((countval & 0x08) && max_size > 8)
13916 {
13917 if (TARGET_64BIT)
13918 {
13919 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13920 emit_insn (gen_strset (destptr, dest, value));
13921 }
13922 else
13923 {
13924 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13925 emit_insn (gen_strset (destptr, dest, value));
13926 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13927 emit_insn (gen_strset (destptr, dest, value));
13928 }
13929 offset += 8;
13930 }
13931 if ((countval & 0x04) && max_size > 4)
13932 {
13933 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13934 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13935 offset += 4;
13936 }
13937 if ((countval & 0x02) && max_size > 2)
13938 {
13939 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13940 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13941 offset += 2;
13942 }
13943 if ((countval & 0x01) && max_size > 1)
13944 {
13945 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13946 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13947 offset += 1;
13948 }
13949 return;
13950 }
13951 if (max_size > 32)
13952 {
13953 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13954 return;
13955 }
13956 if (max_size > 16)
13957 {
13958 rtx label = ix86_expand_aligntest (count, 16, true);
13959 if (TARGET_64BIT)
13960 {
13961 dest = change_address (destmem, DImode, destptr);
13962 emit_insn (gen_strset (destptr, dest, value));
13963 emit_insn (gen_strset (destptr, dest, value));
13964 }
13965 else
13966 {
13967 dest = change_address (destmem, SImode, destptr);
13968 emit_insn (gen_strset (destptr, dest, value));
13969 emit_insn (gen_strset (destptr, dest, value));
13970 emit_insn (gen_strset (destptr, dest, value));
13971 emit_insn (gen_strset (destptr, dest, value));
13972 }
13973 emit_label (label);
13974 LABEL_NUSES (label) = 1;
13975 }
13976 if (max_size > 8)
13977 {
13978 rtx label = ix86_expand_aligntest (count, 8, true);
13979 if (TARGET_64BIT)
13980 {
13981 dest = change_address (destmem, DImode, destptr);
13982 emit_insn (gen_strset (destptr, dest, value));
13983 }
13984 else
13985 {
13986 dest = change_address (destmem, SImode, destptr);
13987 emit_insn (gen_strset (destptr, dest, value));
13988 emit_insn (gen_strset (destptr, dest, value));
13989 }
13990 emit_label (label);
13991 LABEL_NUSES (label) = 1;
13992 }
13993 if (max_size > 4)
13994 {
13995 rtx label = ix86_expand_aligntest (count, 4, true);
13996 dest = change_address (destmem, SImode, destptr);
13997 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13998 emit_label (label);
13999 LABEL_NUSES (label) = 1;
14000 }
14001 if (max_size > 2)
14002 {
14003 rtx label = ix86_expand_aligntest (count, 2, true);
14004 dest = change_address (destmem, HImode, destptr);
14005 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14006 emit_label (label);
14007 LABEL_NUSES (label) = 1;
14008 }
14009 if (max_size > 1)
14010 {
14011 rtx label = ix86_expand_aligntest (count, 1, true);
14012 dest = change_address (destmem, QImode, destptr);
14013 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14014 emit_label (label);
14015 LABEL_NUSES (label) = 1;
14016 }
14017 }
14018
14019 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14020 DESIRED_ALIGNMENT. */
14021 static void
14022 expand_movmem_prologue (rtx destmem, rtx srcmem,
14023 rtx destptr, rtx srcptr, rtx count,
14024 int align, int desired_alignment)
14025 {
14026 if (align <= 1 && desired_alignment > 1)
14027 {
14028 rtx label = ix86_expand_aligntest (destptr, 1, false);
14029 srcmem = change_address (srcmem, QImode, srcptr);
14030 destmem = change_address (destmem, QImode, destptr);
14031 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14032 ix86_adjust_counter (count, 1);
14033 emit_label (label);
14034 LABEL_NUSES (label) = 1;
14035 }
14036 if (align <= 2 && desired_alignment > 2)
14037 {
14038 rtx label = ix86_expand_aligntest (destptr, 2, false);
14039 srcmem = change_address (srcmem, HImode, srcptr);
14040 destmem = change_address (destmem, HImode, destptr);
14041 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14042 ix86_adjust_counter (count, 2);
14043 emit_label (label);
14044 LABEL_NUSES (label) = 1;
14045 }
14046 if (align <= 4 && desired_alignment > 4)
14047 {
14048 rtx label = ix86_expand_aligntest (destptr, 4, false);
14049 srcmem = change_address (srcmem, SImode, srcptr);
14050 destmem = change_address (destmem, SImode, destptr);
14051 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14052 ix86_adjust_counter (count, 4);
14053 emit_label (label);
14054 LABEL_NUSES (label) = 1;
14055 }
14056 gcc_assert (desired_alignment <= 8);
14057 }
14058
14059 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14060 DESIRED_ALIGNMENT. */
14061 static void
14062 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14063 int align, int desired_alignment)
14064 {
14065 if (align <= 1 && desired_alignment > 1)
14066 {
14067 rtx label = ix86_expand_aligntest (destptr, 1, false);
14068 destmem = change_address (destmem, QImode, destptr);
14069 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14070 ix86_adjust_counter (count, 1);
14071 emit_label (label);
14072 LABEL_NUSES (label) = 1;
14073 }
14074 if (align <= 2 && desired_alignment > 2)
14075 {
14076 rtx label = ix86_expand_aligntest (destptr, 2, false);
14077 destmem = change_address (destmem, HImode, destptr);
14078 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14079 ix86_adjust_counter (count, 2);
14080 emit_label (label);
14081 LABEL_NUSES (label) = 1;
14082 }
14083 if (align <= 4 && desired_alignment > 4)
14084 {
14085 rtx label = ix86_expand_aligntest (destptr, 4, false);
14086 destmem = change_address (destmem, SImode, destptr);
14087 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14088 ix86_adjust_counter (count, 4);
14089 emit_label (label);
14090 LABEL_NUSES (label) = 1;
14091 }
14092 gcc_assert (desired_alignment <= 8);
14093 }
14094
14095 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14096 static enum stringop_alg
14097 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14098 int *dynamic_check)
14099 {
14100 const struct stringop_algs * algs;
14101
14102 *dynamic_check = -1;
14103 if (memset)
14104 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14105 else
14106 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14107 if (stringop_alg != no_stringop)
14108 return stringop_alg;
14109 /* rep; movq or rep; movl is the smallest variant. */
14110 else if (optimize_size)
14111 {
14112 if (!count || (count & 3))
14113 return rep_prefix_1_byte;
14114 else
14115 return rep_prefix_4_byte;
14116 }
14117 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14118 */
14119 else if (expected_size != -1 && expected_size < 4)
14120 return loop_1_byte;
14121 else if (expected_size != -1)
14122 {
14123 unsigned int i;
14124 enum stringop_alg alg = libcall;
14125 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14126 {
14127 gcc_assert (algs->size[i].max);
14128 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14129 {
14130 if (algs->size[i].alg != libcall)
14131 alg = algs->size[i].alg;
14132 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14133 last non-libcall inline algorithm. */
14134 if (TARGET_INLINE_ALL_STRINGOPS)
14135 {
14136 /* When the current size is best to be copied by a libcall,
14137 but we are still forced to inline, run the heuristic bellow
14138 that will pick code for medium sized blocks. */
14139 if (alg != libcall)
14140 return alg;
14141 break;
14142 }
14143 else
14144 return algs->size[i].alg;
14145 }
14146 }
14147 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14148 }
14149 /* When asked to inline the call anyway, try to pick meaningful choice.
14150 We look for maximal size of block that is faster to copy by hand and
14151 take blocks of at most of that size guessing that average size will
14152 be roughly half of the block.
14153
14154 If this turns out to be bad, we might simply specify the preferred
14155 choice in ix86_costs. */
14156 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14157 && algs->unknown_size == libcall)
14158 {
14159 int max = -1;
14160 enum stringop_alg alg;
14161 int i;
14162
14163 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14164 if (algs->size[i].alg != libcall && algs->size[i].alg)
14165 max = algs->size[i].max;
14166 if (max == -1)
14167 max = 4096;
14168 alg = decide_alg (count, max / 2, memset, dynamic_check);
14169 gcc_assert (*dynamic_check == -1);
14170 gcc_assert (alg != libcall);
14171 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14172 *dynamic_check = max;
14173 return alg;
14174 }
14175 return algs->unknown_size;
14176 }
14177
14178 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14179 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14180 static int
14181 decide_alignment (int align,
14182 enum stringop_alg alg,
14183 int expected_size)
14184 {
14185 int desired_align = 0;
14186 switch (alg)
14187 {
14188 case no_stringop:
14189 gcc_unreachable ();
14190 case loop:
14191 case unrolled_loop:
14192 desired_align = GET_MODE_SIZE (Pmode);
14193 break;
14194 case rep_prefix_8_byte:
14195 desired_align = 8;
14196 break;
14197 case rep_prefix_4_byte:
14198 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14199 copying whole cacheline at once. */
14200 if (TARGET_PENTIUMPRO)
14201 desired_align = 8;
14202 else
14203 desired_align = 4;
14204 break;
14205 case rep_prefix_1_byte:
14206 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14207 copying whole cacheline at once. */
14208 if (TARGET_PENTIUMPRO)
14209 desired_align = 8;
14210 else
14211 desired_align = 1;
14212 break;
14213 case loop_1_byte:
14214 desired_align = 1;
14215 break;
14216 case libcall:
14217 return 0;
14218 }
14219
14220 if (optimize_size)
14221 desired_align = 1;
14222 if (desired_align < align)
14223 desired_align = align;
14224 if (expected_size != -1 && expected_size < 4)
14225 desired_align = align;
14226 return desired_align;
14227 }
14228
14229 /* Return the smallest power of 2 greater than VAL. */
14230 static int
14231 smallest_pow2_greater_than (int val)
14232 {
14233 int ret = 1;
14234 while (ret <= val)
14235 ret <<= 1;
14236 return ret;
14237 }
14238
14239 /* Expand string move (memcpy) operation. Use i386 string operations when
14240 profitable. expand_clrmem contains similar code. The code depends upon
14241 architecture, block size and alignment, but always has the same
14242 overall structure:
14243
14244 1) Prologue guard: Conditional that jumps up to epilogues for small
14245 blocks that can be handled by epilogue alone. This is faster but
14246 also needed for correctness, since prologue assume the block is larger
14247 than the desired alignment.
14248
14249 Optional dynamic check for size and libcall for large
14250 blocks is emitted here too, with -minline-stringops-dynamically.
14251
14252 2) Prologue: copy first few bytes in order to get destination aligned
14253 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14254 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14255 We emit either a jump tree on power of two sized blocks, or a byte loop.
14256
14257 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14258 with specified algorithm.
14259
14260 4) Epilogue: code copying tail of the block that is too small to be
14261 handled by main body (or up to size guarded by prologue guard). */
14262
14263 int
14264 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14265 rtx expected_align_exp, rtx expected_size_exp)
14266 {
14267 rtx destreg;
14268 rtx srcreg;
14269 rtx label = NULL;
14270 rtx tmp;
14271 rtx jump_around_label = NULL;
14272 HOST_WIDE_INT align = 1;
14273 unsigned HOST_WIDE_INT count = 0;
14274 HOST_WIDE_INT expected_size = -1;
14275 int size_needed = 0, epilogue_size_needed;
14276 int desired_align = 0;
14277 enum stringop_alg alg;
14278 int dynamic_check;
14279
14280 if (CONST_INT_P (align_exp))
14281 align = INTVAL (align_exp);
14282 /* i386 can do misaligned access on reasonably increased cost. */
14283 if (CONST_INT_P (expected_align_exp)
14284 && INTVAL (expected_align_exp) > align)
14285 align = INTVAL (expected_align_exp);
14286 if (CONST_INT_P (count_exp))
14287 count = expected_size = INTVAL (count_exp);
14288 if (CONST_INT_P (expected_size_exp) && count == 0)
14289 expected_size = INTVAL (expected_size_exp);
14290
14291 /* Step 0: Decide on preferred algorithm, desired alignment and
14292 size of chunks to be copied by main loop. */
14293
14294 alg = decide_alg (count, expected_size, false, &dynamic_check);
14295 desired_align = decide_alignment (align, alg, expected_size);
14296
14297 if (!TARGET_ALIGN_STRINGOPS)
14298 align = desired_align;
14299
14300 if (alg == libcall)
14301 return 0;
14302 gcc_assert (alg != no_stringop);
14303 if (!count)
14304 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14305 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14306 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14307 switch (alg)
14308 {
14309 case libcall:
14310 case no_stringop:
14311 gcc_unreachable ();
14312 case loop:
14313 size_needed = GET_MODE_SIZE (Pmode);
14314 break;
14315 case unrolled_loop:
14316 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14317 break;
14318 case rep_prefix_8_byte:
14319 size_needed = 8;
14320 break;
14321 case rep_prefix_4_byte:
14322 size_needed = 4;
14323 break;
14324 case rep_prefix_1_byte:
14325 case loop_1_byte:
14326 size_needed = 1;
14327 break;
14328 }
14329
14330 epilogue_size_needed = size_needed;
14331
14332 /* Step 1: Prologue guard. */
14333
14334 /* Alignment code needs count to be in register. */
14335 if (CONST_INT_P (count_exp) && desired_align > align)
14336 {
14337 enum machine_mode mode = SImode;
14338 if (TARGET_64BIT && (count & ~0xffffffff))
14339 mode = DImode;
14340 count_exp = force_reg (mode, count_exp);
14341 }
14342 gcc_assert (desired_align >= 1 && align >= 1);
14343
14344 /* Ensure that alignment prologue won't copy past end of block. */
14345 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14346 {
14347 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14348 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14349 Make sure it is power of 2. */
14350 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14351
14352 label = gen_label_rtx ();
14353 emit_cmp_and_jump_insns (count_exp,
14354 GEN_INT (epilogue_size_needed),
14355 LTU, 0, counter_mode (count_exp), 1, label);
14356 if (GET_CODE (count_exp) == CONST_INT)
14357 ;
14358 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14359 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14360 else
14361 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14362 }
14363 /* Emit code to decide on runtime whether library call or inline should be
14364 used. */
14365 if (dynamic_check != -1)
14366 {
14367 rtx hot_label = gen_label_rtx ();
14368 jump_around_label = gen_label_rtx ();
14369 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14370 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14371 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14372 emit_block_move_via_libcall (dst, src, count_exp, false);
14373 emit_jump (jump_around_label);
14374 emit_label (hot_label);
14375 }
14376
14377 /* Step 2: Alignment prologue. */
14378
14379 if (desired_align > align)
14380 {
14381 /* Except for the first move in epilogue, we no longer know
14382 constant offset in aliasing info. It don't seems to worth
14383 the pain to maintain it for the first move, so throw away
14384 the info early. */
14385 src = change_address (src, BLKmode, srcreg);
14386 dst = change_address (dst, BLKmode, destreg);
14387 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14388 desired_align);
14389 }
14390 if (label && size_needed == 1)
14391 {
14392 emit_label (label);
14393 LABEL_NUSES (label) = 1;
14394 label = NULL;
14395 }
14396
14397 /* Step 3: Main loop. */
14398
14399 switch (alg)
14400 {
14401 case libcall:
14402 case no_stringop:
14403 gcc_unreachable ();
14404 case loop_1_byte:
14405 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14406 count_exp, QImode, 1, expected_size);
14407 break;
14408 case loop:
14409 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14410 count_exp, Pmode, 1, expected_size);
14411 break;
14412 case unrolled_loop:
14413 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14414 registers for 4 temporaries anyway. */
14415 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14416 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14417 expected_size);
14418 break;
14419 case rep_prefix_8_byte:
14420 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14421 DImode);
14422 break;
14423 case rep_prefix_4_byte:
14424 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14425 SImode);
14426 break;
14427 case rep_prefix_1_byte:
14428 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14429 QImode);
14430 break;
14431 }
14432 /* Adjust properly the offset of src and dest memory for aliasing. */
14433 if (CONST_INT_P (count_exp))
14434 {
14435 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14436 (count / size_needed) * size_needed);
14437 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14438 (count / size_needed) * size_needed);
14439 }
14440 else
14441 {
14442 src = change_address (src, BLKmode, srcreg);
14443 dst = change_address (dst, BLKmode, destreg);
14444 }
14445
14446 /* Step 4: Epilogue to copy the remaining bytes. */
14447
14448 if (label)
14449 {
14450 /* When the main loop is done, COUNT_EXP might hold original count,
14451 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14452 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14453 bytes. Compensate if needed. */
14454
14455 if (size_needed < epilogue_size_needed)
14456 {
14457 tmp =
14458 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14459 GEN_INT (size_needed - 1), count_exp, 1,
14460 OPTAB_DIRECT);
14461 if (tmp != count_exp)
14462 emit_move_insn (count_exp, tmp);
14463 }
14464 emit_label (label);
14465 LABEL_NUSES (label) = 1;
14466 }
14467
14468 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14469 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14470 epilogue_size_needed);
14471 if (jump_around_label)
14472 emit_label (jump_around_label);
14473 return 1;
14474 }
14475
14476 /* Helper function for memcpy. For QImode value 0xXY produce
14477 0xXYXYXYXY of wide specified by MODE. This is essentially
14478 a * 0x10101010, but we can do slightly better than
14479 synth_mult by unwinding the sequence by hand on CPUs with
14480 slow multiply. */
14481 static rtx
14482 promote_duplicated_reg (enum machine_mode mode, rtx val)
14483 {
14484 enum machine_mode valmode = GET_MODE (val);
14485 rtx tmp;
14486 int nops = mode == DImode ? 3 : 2;
14487
14488 gcc_assert (mode == SImode || mode == DImode);
14489 if (val == const0_rtx)
14490 return copy_to_mode_reg (mode, const0_rtx);
14491 if (CONST_INT_P (val))
14492 {
14493 HOST_WIDE_INT v = INTVAL (val) & 255;
14494
14495 v |= v << 8;
14496 v |= v << 16;
14497 if (mode == DImode)
14498 v |= (v << 16) << 16;
14499 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14500 }
14501
14502 if (valmode == VOIDmode)
14503 valmode = QImode;
14504 if (valmode != QImode)
14505 val = gen_lowpart (QImode, val);
14506 if (mode == QImode)
14507 return val;
14508 if (!TARGET_PARTIAL_REG_STALL)
14509 nops--;
14510 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14511 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14512 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14513 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14514 {
14515 rtx reg = convert_modes (mode, QImode, val, true);
14516 tmp = promote_duplicated_reg (mode, const1_rtx);
14517 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14518 OPTAB_DIRECT);
14519 }
14520 else
14521 {
14522 rtx reg = convert_modes (mode, QImode, val, true);
14523
14524 if (!TARGET_PARTIAL_REG_STALL)
14525 if (mode == SImode)
14526 emit_insn (gen_movsi_insv_1 (reg, reg));
14527 else
14528 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14529 else
14530 {
14531 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14532 NULL, 1, OPTAB_DIRECT);
14533 reg =
14534 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14535 }
14536 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14537 NULL, 1, OPTAB_DIRECT);
14538 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14539 if (mode == SImode)
14540 return reg;
14541 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14542 NULL, 1, OPTAB_DIRECT);
14543 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14544 return reg;
14545 }
14546 }
14547
14548 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14549 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14550 alignment from ALIGN to DESIRED_ALIGN. */
14551 static rtx
14552 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14553 {
14554 rtx promoted_val;
14555
14556 if (TARGET_64BIT
14557 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14558 promoted_val = promote_duplicated_reg (DImode, val);
14559 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14560 promoted_val = promote_duplicated_reg (SImode, val);
14561 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14562 promoted_val = promote_duplicated_reg (HImode, val);
14563 else
14564 promoted_val = val;
14565
14566 return promoted_val;
14567 }
14568
14569 /* Expand string clear operation (bzero). Use i386 string operations when
14570 profitable. See expand_movmem comment for explanation of individual
14571 steps performed. */
14572 int
14573 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14574 rtx expected_align_exp, rtx expected_size_exp)
14575 {
14576 rtx destreg;
14577 rtx label = NULL;
14578 rtx tmp;
14579 rtx jump_around_label = NULL;
14580 HOST_WIDE_INT align = 1;
14581 unsigned HOST_WIDE_INT count = 0;
14582 HOST_WIDE_INT expected_size = -1;
14583 int size_needed = 0, epilogue_size_needed;
14584 int desired_align = 0;
14585 enum stringop_alg alg;
14586 rtx promoted_val = NULL;
14587 bool force_loopy_epilogue = false;
14588 int dynamic_check;
14589
14590 if (CONST_INT_P (align_exp))
14591 align = INTVAL (align_exp);
14592 /* i386 can do misaligned access on reasonably increased cost. */
14593 if (CONST_INT_P (expected_align_exp)
14594 && INTVAL (expected_align_exp) > align)
14595 align = INTVAL (expected_align_exp);
14596 if (CONST_INT_P (count_exp))
14597 count = expected_size = INTVAL (count_exp);
14598 if (CONST_INT_P (expected_size_exp) && count == 0)
14599 expected_size = INTVAL (expected_size_exp);
14600
14601 /* Step 0: Decide on preferred algorithm, desired alignment and
14602 size of chunks to be copied by main loop. */
14603
14604 alg = decide_alg (count, expected_size, true, &dynamic_check);
14605 desired_align = decide_alignment (align, alg, expected_size);
14606
14607 if (!TARGET_ALIGN_STRINGOPS)
14608 align = desired_align;
14609
14610 if (alg == libcall)
14611 return 0;
14612 gcc_assert (alg != no_stringop);
14613 if (!count)
14614 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14615 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14616 switch (alg)
14617 {
14618 case libcall:
14619 case no_stringop:
14620 gcc_unreachable ();
14621 case loop:
14622 size_needed = GET_MODE_SIZE (Pmode);
14623 break;
14624 case unrolled_loop:
14625 size_needed = GET_MODE_SIZE (Pmode) * 4;
14626 break;
14627 case rep_prefix_8_byte:
14628 size_needed = 8;
14629 break;
14630 case rep_prefix_4_byte:
14631 size_needed = 4;
14632 break;
14633 case rep_prefix_1_byte:
14634 case loop_1_byte:
14635 size_needed = 1;
14636 break;
14637 }
14638 epilogue_size_needed = size_needed;
14639
14640 /* Step 1: Prologue guard. */
14641
14642 /* Alignment code needs count to be in register. */
14643 if (CONST_INT_P (count_exp) && desired_align > align)
14644 {
14645 enum machine_mode mode = SImode;
14646 if (TARGET_64BIT && (count & ~0xffffffff))
14647 mode = DImode;
14648 count_exp = force_reg (mode, count_exp);
14649 }
14650 /* Do the cheap promotion to allow better CSE across the
14651 main loop and epilogue (ie one load of the big constant in the
14652 front of all code. */
14653 if (CONST_INT_P (val_exp))
14654 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14655 desired_align, align);
14656 /* Ensure that alignment prologue won't copy past end of block. */
14657 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14658 {
14659 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14660 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14661 Make sure it is power of 2. */
14662 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14663
14664 /* To improve performance of small blocks, we jump around the VAL
14665 promoting mode. This mean that if the promoted VAL is not constant,
14666 we might not use it in the epilogue and have to use byte
14667 loop variant. */
14668 if (epilogue_size_needed > 2 && !promoted_val)
14669 force_loopy_epilogue = true;
14670 label = gen_label_rtx ();
14671 emit_cmp_and_jump_insns (count_exp,
14672 GEN_INT (epilogue_size_needed),
14673 LTU, 0, counter_mode (count_exp), 1, label);
14674 if (GET_CODE (count_exp) == CONST_INT)
14675 ;
14676 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14677 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14678 else
14679 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14680 }
14681 if (dynamic_check != -1)
14682 {
14683 rtx hot_label = gen_label_rtx ();
14684 jump_around_label = gen_label_rtx ();
14685 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14686 LEU, 0, counter_mode (count_exp), 1, hot_label);
14687 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14688 set_storage_via_libcall (dst, count_exp, val_exp, false);
14689 emit_jump (jump_around_label);
14690 emit_label (hot_label);
14691 }
14692
14693 /* Step 2: Alignment prologue. */
14694
14695 /* Do the expensive promotion once we branched off the small blocks. */
14696 if (!promoted_val)
14697 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14698 desired_align, align);
14699 gcc_assert (desired_align >= 1 && align >= 1);
14700
14701 if (desired_align > align)
14702 {
14703 /* Except for the first move in epilogue, we no longer know
14704 constant offset in aliasing info. It don't seems to worth
14705 the pain to maintain it for the first move, so throw away
14706 the info early. */
14707 dst = change_address (dst, BLKmode, destreg);
14708 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14709 desired_align);
14710 }
14711 if (label && size_needed == 1)
14712 {
14713 emit_label (label);
14714 LABEL_NUSES (label) = 1;
14715 label = NULL;
14716 }
14717
14718 /* Step 3: Main loop. */
14719
14720 switch (alg)
14721 {
14722 case libcall:
14723 case no_stringop:
14724 gcc_unreachable ();
14725 case loop_1_byte:
14726 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14727 count_exp, QImode, 1, expected_size);
14728 break;
14729 case loop:
14730 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14731 count_exp, Pmode, 1, expected_size);
14732 break;
14733 case unrolled_loop:
14734 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14735 count_exp, Pmode, 4, expected_size);
14736 break;
14737 case rep_prefix_8_byte:
14738 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14739 DImode);
14740 break;
14741 case rep_prefix_4_byte:
14742 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14743 SImode);
14744 break;
14745 case rep_prefix_1_byte:
14746 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14747 QImode);
14748 break;
14749 }
14750 /* Adjust properly the offset of src and dest memory for aliasing. */
14751 if (CONST_INT_P (count_exp))
14752 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14753 (count / size_needed) * size_needed);
14754 else
14755 dst = change_address (dst, BLKmode, destreg);
14756
14757 /* Step 4: Epilogue to copy the remaining bytes. */
14758
14759 if (label)
14760 {
14761 /* When the main loop is done, COUNT_EXP might hold original count,
14762 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14763 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14764 bytes. Compensate if needed. */
14765
14766 if (size_needed < desired_align - align)
14767 {
14768 tmp =
14769 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14770 GEN_INT (size_needed - 1), count_exp, 1,
14771 OPTAB_DIRECT);
14772 size_needed = desired_align - align + 1;
14773 if (tmp != count_exp)
14774 emit_move_insn (count_exp, tmp);
14775 }
14776 emit_label (label);
14777 LABEL_NUSES (label) = 1;
14778 }
14779 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14780 {
14781 if (force_loopy_epilogue)
14782 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14783 size_needed);
14784 else
14785 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14786 size_needed);
14787 }
14788 if (jump_around_label)
14789 emit_label (jump_around_label);
14790 return 1;
14791 }
14792
14793 /* Expand strlen. */
14794 int
14795 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14796 {
14797 rtx addr, scratch1, scratch2, scratch3, scratch4;
14798
14799 /* The generic case of strlen expander is long. Avoid it's
14800 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14801
14802 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14803 && !TARGET_INLINE_ALL_STRINGOPS
14804 && !optimize_size
14805 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14806 return 0;
14807
14808 addr = force_reg (Pmode, XEXP (src, 0));
14809 scratch1 = gen_reg_rtx (Pmode);
14810
14811 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14812 && !optimize_size)
14813 {
14814 /* Well it seems that some optimizer does not combine a call like
14815 foo(strlen(bar), strlen(bar));
14816 when the move and the subtraction is done here. It does calculate
14817 the length just once when these instructions are done inside of
14818 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14819 often used and I use one fewer register for the lifetime of
14820 output_strlen_unroll() this is better. */
14821
14822 emit_move_insn (out, addr);
14823
14824 ix86_expand_strlensi_unroll_1 (out, src, align);
14825
14826 /* strlensi_unroll_1 returns the address of the zero at the end of
14827 the string, like memchr(), so compute the length by subtracting
14828 the start address. */
14829 if (TARGET_64BIT)
14830 emit_insn (gen_subdi3 (out, out, addr));
14831 else
14832 emit_insn (gen_subsi3 (out, out, addr));
14833 }
14834 else
14835 {
14836 rtx unspec;
14837 scratch2 = gen_reg_rtx (Pmode);
14838 scratch3 = gen_reg_rtx (Pmode);
14839 scratch4 = force_reg (Pmode, constm1_rtx);
14840
14841 emit_move_insn (scratch3, addr);
14842 eoschar = force_reg (QImode, eoschar);
14843
14844 src = replace_equiv_address_nv (src, scratch3);
14845
14846 /* If .md starts supporting :P, this can be done in .md. */
14847 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14848 scratch4), UNSPEC_SCAS);
14849 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14850 if (TARGET_64BIT)
14851 {
14852 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14853 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14854 }
14855 else
14856 {
14857 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14858 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14859 }
14860 }
14861 return 1;
14862 }
14863
14864 /* Expand the appropriate insns for doing strlen if not just doing
14865 repnz; scasb
14866
14867 out = result, initialized with the start address
14868 align_rtx = alignment of the address.
14869 scratch = scratch register, initialized with the startaddress when
14870 not aligned, otherwise undefined
14871
14872 This is just the body. It needs the initializations mentioned above and
14873 some address computing at the end. These things are done in i386.md. */
14874
14875 static void
14876 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14877 {
14878 int align;
14879 rtx tmp;
14880 rtx align_2_label = NULL_RTX;
14881 rtx align_3_label = NULL_RTX;
14882 rtx align_4_label = gen_label_rtx ();
14883 rtx end_0_label = gen_label_rtx ();
14884 rtx mem;
14885 rtx tmpreg = gen_reg_rtx (SImode);
14886 rtx scratch = gen_reg_rtx (SImode);
14887 rtx cmp;
14888
14889 align = 0;
14890 if (CONST_INT_P (align_rtx))
14891 align = INTVAL (align_rtx);
14892
14893 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14894
14895 /* Is there a known alignment and is it less than 4? */
14896 if (align < 4)
14897 {
14898 rtx scratch1 = gen_reg_rtx (Pmode);
14899 emit_move_insn (scratch1, out);
14900 /* Is there a known alignment and is it not 2? */
14901 if (align != 2)
14902 {
14903 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14904 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14905
14906 /* Leave just the 3 lower bits. */
14907 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14908 NULL_RTX, 0, OPTAB_WIDEN);
14909
14910 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14911 Pmode, 1, align_4_label);
14912 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14913 Pmode, 1, align_2_label);
14914 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14915 Pmode, 1, align_3_label);
14916 }
14917 else
14918 {
14919 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14920 check if is aligned to 4 - byte. */
14921
14922 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14923 NULL_RTX, 0, OPTAB_WIDEN);
14924
14925 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14926 Pmode, 1, align_4_label);
14927 }
14928
14929 mem = change_address (src, QImode, out);
14930
14931 /* Now compare the bytes. */
14932
14933 /* Compare the first n unaligned byte on a byte per byte basis. */
14934 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14935 QImode, 1, end_0_label);
14936
14937 /* Increment the address. */
14938 if (TARGET_64BIT)
14939 emit_insn (gen_adddi3 (out, out, const1_rtx));
14940 else
14941 emit_insn (gen_addsi3 (out, out, const1_rtx));
14942
14943 /* Not needed with an alignment of 2 */
14944 if (align != 2)
14945 {
14946 emit_label (align_2_label);
14947
14948 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14949 end_0_label);
14950
14951 if (TARGET_64BIT)
14952 emit_insn (gen_adddi3 (out, out, const1_rtx));
14953 else
14954 emit_insn (gen_addsi3 (out, out, const1_rtx));
14955
14956 emit_label (align_3_label);
14957 }
14958
14959 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14960 end_0_label);
14961
14962 if (TARGET_64BIT)
14963 emit_insn (gen_adddi3 (out, out, const1_rtx));
14964 else
14965 emit_insn (gen_addsi3 (out, out, const1_rtx));
14966 }
14967
14968 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14969 align this loop. It gives only huge programs, but does not help to
14970 speed up. */
14971 emit_label (align_4_label);
14972
14973 mem = change_address (src, SImode, out);
14974 emit_move_insn (scratch, mem);
14975 if (TARGET_64BIT)
14976 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14977 else
14978 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14979
14980 /* This formula yields a nonzero result iff one of the bytes is zero.
14981 This saves three branches inside loop and many cycles. */
14982
14983 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14984 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14985 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14986 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14987 gen_int_mode (0x80808080, SImode)));
14988 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14989 align_4_label);
14990
14991 if (TARGET_CMOVE)
14992 {
14993 rtx reg = gen_reg_rtx (SImode);
14994 rtx reg2 = gen_reg_rtx (Pmode);
14995 emit_move_insn (reg, tmpreg);
14996 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14997
14998 /* If zero is not in the first two bytes, move two bytes forward. */
14999 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15000 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15001 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15002 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15003 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15004 reg,
15005 tmpreg)));
15006 /* Emit lea manually to avoid clobbering of flags. */
15007 emit_insn (gen_rtx_SET (SImode, reg2,
15008 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15009
15010 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15011 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15012 emit_insn (gen_rtx_SET (VOIDmode, out,
15013 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15014 reg2,
15015 out)));
15016
15017 }
15018 else
15019 {
15020 rtx end_2_label = gen_label_rtx ();
15021 /* Is zero in the first two bytes? */
15022
15023 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15024 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15025 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15026 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15027 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15028 pc_rtx);
15029 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15030 JUMP_LABEL (tmp) = end_2_label;
15031
15032 /* Not in the first two. Move two bytes forward. */
15033 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15034 if (TARGET_64BIT)
15035 emit_insn (gen_adddi3 (out, out, const2_rtx));
15036 else
15037 emit_insn (gen_addsi3 (out, out, const2_rtx));
15038
15039 emit_label (end_2_label);
15040
15041 }
15042
15043 /* Avoid branch in fixing the byte. */
15044 tmpreg = gen_lowpart (QImode, tmpreg);
15045 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15046 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
15047 if (TARGET_64BIT)
15048 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15049 else
15050 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15051
15052 emit_label (end_0_label);
15053 }
15054
15055 /* For given symbol (function) construct code to compute address of it's PLT
15056 entry in large x86-64 PIC model. */
15057 rtx
15058 construct_plt_address (rtx symbol)
15059 {
15060 rtx tmp = gen_reg_rtx (Pmode);
15061 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15062
15063 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15064 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15065
15066 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15067 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15068 return tmp;
15069 }
15070
15071 void
15072 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15073 rtx callarg2 ATTRIBUTE_UNUSED,
15074 rtx pop, int sibcall)
15075 {
15076 rtx use = NULL, call;
15077
15078 if (pop == const0_rtx)
15079 pop = NULL;
15080 gcc_assert (!TARGET_64BIT || !pop);
15081
15082 if (TARGET_MACHO && !TARGET_64BIT)
15083 {
15084 #if TARGET_MACHO
15085 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15086 fnaddr = machopic_indirect_call_target (fnaddr);
15087 #endif
15088 }
15089 else
15090 {
15091 /* Static functions and indirect calls don't need the pic register. */
15092 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15093 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15094 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15095 use_reg (&use, pic_offset_table_rtx);
15096 }
15097
15098 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15099 {
15100 rtx al = gen_rtx_REG (QImode, 0);
15101 emit_move_insn (al, callarg2);
15102 use_reg (&use, al);
15103 }
15104
15105 if (ix86_cmodel == CM_LARGE_PIC
15106 && GET_CODE (fnaddr) == MEM
15107 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15108 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15109 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15110 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15111 {
15112 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15113 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15114 }
15115 if (sibcall && TARGET_64BIT
15116 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15117 {
15118 rtx addr;
15119 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15120 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15121 emit_move_insn (fnaddr, addr);
15122 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15123 }
15124
15125 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15126 if (retval)
15127 call = gen_rtx_SET (VOIDmode, retval, call);
15128 if (pop)
15129 {
15130 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15131 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15132 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15133 }
15134
15135 call = emit_call_insn (call);
15136 if (use)
15137 CALL_INSN_FUNCTION_USAGE (call) = use;
15138 }
15139
15140 \f
15141 /* Clear stack slot assignments remembered from previous functions.
15142 This is called from INIT_EXPANDERS once before RTL is emitted for each
15143 function. */
15144
15145 static struct machine_function *
15146 ix86_init_machine_status (void)
15147 {
15148 struct machine_function *f;
15149
15150 f = ggc_alloc_cleared (sizeof (struct machine_function));
15151 f->use_fast_prologue_epilogue_nregs = -1;
15152 f->tls_descriptor_call_expanded_p = 0;
15153
15154 return f;
15155 }
15156
15157 /* Return a MEM corresponding to a stack slot with mode MODE.
15158 Allocate a new slot if necessary.
15159
15160 The RTL for a function can have several slots available: N is
15161 which slot to use. */
15162
15163 rtx
15164 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15165 {
15166 struct stack_local_entry *s;
15167
15168 gcc_assert (n < MAX_386_STACK_LOCALS);
15169
15170 for (s = ix86_stack_locals; s; s = s->next)
15171 if (s->mode == mode && s->n == n)
15172 return copy_rtx (s->rtl);
15173
15174 s = (struct stack_local_entry *)
15175 ggc_alloc (sizeof (struct stack_local_entry));
15176 s->n = n;
15177 s->mode = mode;
15178 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15179
15180 s->next = ix86_stack_locals;
15181 ix86_stack_locals = s;
15182 return s->rtl;
15183 }
15184
15185 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15186
15187 static GTY(()) rtx ix86_tls_symbol;
15188 rtx
15189 ix86_tls_get_addr (void)
15190 {
15191
15192 if (!ix86_tls_symbol)
15193 {
15194 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15195 (TARGET_ANY_GNU_TLS
15196 && !TARGET_64BIT)
15197 ? "___tls_get_addr"
15198 : "__tls_get_addr");
15199 }
15200
15201 return ix86_tls_symbol;
15202 }
15203
15204 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15205
15206 static GTY(()) rtx ix86_tls_module_base_symbol;
15207 rtx
15208 ix86_tls_module_base (void)
15209 {
15210
15211 if (!ix86_tls_module_base_symbol)
15212 {
15213 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15214 "_TLS_MODULE_BASE_");
15215 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15216 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15217 }
15218
15219 return ix86_tls_module_base_symbol;
15220 }
15221 \f
15222 /* Calculate the length of the memory address in the instruction
15223 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15224
15225 int
15226 memory_address_length (rtx addr)
15227 {
15228 struct ix86_address parts;
15229 rtx base, index, disp;
15230 int len;
15231 int ok;
15232
15233 if (GET_CODE (addr) == PRE_DEC
15234 || GET_CODE (addr) == POST_INC
15235 || GET_CODE (addr) == PRE_MODIFY
15236 || GET_CODE (addr) == POST_MODIFY)
15237 return 0;
15238
15239 ok = ix86_decompose_address (addr, &parts);
15240 gcc_assert (ok);
15241
15242 if (parts.base && GET_CODE (parts.base) == SUBREG)
15243 parts.base = SUBREG_REG (parts.base);
15244 if (parts.index && GET_CODE (parts.index) == SUBREG)
15245 parts.index = SUBREG_REG (parts.index);
15246
15247 base = parts.base;
15248 index = parts.index;
15249 disp = parts.disp;
15250 len = 0;
15251
15252 /* Rule of thumb:
15253 - esp as the base always wants an index,
15254 - ebp as the base always wants a displacement. */
15255
15256 /* Register Indirect. */
15257 if (base && !index && !disp)
15258 {
15259 /* esp (for its index) and ebp (for its displacement) need
15260 the two-byte modrm form. */
15261 if (addr == stack_pointer_rtx
15262 || addr == arg_pointer_rtx
15263 || addr == frame_pointer_rtx
15264 || addr == hard_frame_pointer_rtx)
15265 len = 1;
15266 }
15267
15268 /* Direct Addressing. */
15269 else if (disp && !base && !index)
15270 len = 4;
15271
15272 else
15273 {
15274 /* Find the length of the displacement constant. */
15275 if (disp)
15276 {
15277 if (base && satisfies_constraint_K (disp))
15278 len = 1;
15279 else
15280 len = 4;
15281 }
15282 /* ebp always wants a displacement. */
15283 else if (base == hard_frame_pointer_rtx)
15284 len = 1;
15285
15286 /* An index requires the two-byte modrm form.... */
15287 if (index
15288 /* ...like esp, which always wants an index. */
15289 || base == stack_pointer_rtx
15290 || base == arg_pointer_rtx
15291 || base == frame_pointer_rtx)
15292 len += 1;
15293 }
15294
15295 return len;
15296 }
15297
15298 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15299 is set, expect that insn have 8bit immediate alternative. */
15300 int
15301 ix86_attr_length_immediate_default (rtx insn, int shortform)
15302 {
15303 int len = 0;
15304 int i;
15305 extract_insn_cached (insn);
15306 for (i = recog_data.n_operands - 1; i >= 0; --i)
15307 if (CONSTANT_P (recog_data.operand[i]))
15308 {
15309 gcc_assert (!len);
15310 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15311 len = 1;
15312 else
15313 {
15314 switch (get_attr_mode (insn))
15315 {
15316 case MODE_QI:
15317 len+=1;
15318 break;
15319 case MODE_HI:
15320 len+=2;
15321 break;
15322 case MODE_SI:
15323 len+=4;
15324 break;
15325 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15326 case MODE_DI:
15327 len+=4;
15328 break;
15329 default:
15330 fatal_insn ("unknown insn mode", insn);
15331 }
15332 }
15333 }
15334 return len;
15335 }
15336 /* Compute default value for "length_address" attribute. */
15337 int
15338 ix86_attr_length_address_default (rtx insn)
15339 {
15340 int i;
15341
15342 if (get_attr_type (insn) == TYPE_LEA)
15343 {
15344 rtx set = PATTERN (insn);
15345
15346 if (GET_CODE (set) == PARALLEL)
15347 set = XVECEXP (set, 0, 0);
15348
15349 gcc_assert (GET_CODE (set) == SET);
15350
15351 return memory_address_length (SET_SRC (set));
15352 }
15353
15354 extract_insn_cached (insn);
15355 for (i = recog_data.n_operands - 1; i >= 0; --i)
15356 if (MEM_P (recog_data.operand[i]))
15357 {
15358 return memory_address_length (XEXP (recog_data.operand[i], 0));
15359 break;
15360 }
15361 return 0;
15362 }
15363 \f
15364 /* Return the maximum number of instructions a cpu can issue. */
15365
15366 static int
15367 ix86_issue_rate (void)
15368 {
15369 switch (ix86_tune)
15370 {
15371 case PROCESSOR_PENTIUM:
15372 case PROCESSOR_K6:
15373 return 2;
15374
15375 case PROCESSOR_PENTIUMPRO:
15376 case PROCESSOR_PENTIUM4:
15377 case PROCESSOR_ATHLON:
15378 case PROCESSOR_K8:
15379 case PROCESSOR_AMDFAM10:
15380 case PROCESSOR_NOCONA:
15381 case PROCESSOR_GENERIC32:
15382 case PROCESSOR_GENERIC64:
15383 return 3;
15384
15385 case PROCESSOR_CORE2:
15386 return 4;
15387
15388 default:
15389 return 1;
15390 }
15391 }
15392
15393 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15394 by DEP_INSN and nothing set by DEP_INSN. */
15395
15396 static int
15397 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15398 {
15399 rtx set, set2;
15400
15401 /* Simplify the test for uninteresting insns. */
15402 if (insn_type != TYPE_SETCC
15403 && insn_type != TYPE_ICMOV
15404 && insn_type != TYPE_FCMOV
15405 && insn_type != TYPE_IBR)
15406 return 0;
15407
15408 if ((set = single_set (dep_insn)) != 0)
15409 {
15410 set = SET_DEST (set);
15411 set2 = NULL_RTX;
15412 }
15413 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15414 && XVECLEN (PATTERN (dep_insn), 0) == 2
15415 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15416 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15417 {
15418 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15419 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15420 }
15421 else
15422 return 0;
15423
15424 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15425 return 0;
15426
15427 /* This test is true if the dependent insn reads the flags but
15428 not any other potentially set register. */
15429 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15430 return 0;
15431
15432 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15433 return 0;
15434
15435 return 1;
15436 }
15437
15438 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15439 address with operands set by DEP_INSN. */
15440
15441 static int
15442 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15443 {
15444 rtx addr;
15445
15446 if (insn_type == TYPE_LEA
15447 && TARGET_PENTIUM)
15448 {
15449 addr = PATTERN (insn);
15450
15451 if (GET_CODE (addr) == PARALLEL)
15452 addr = XVECEXP (addr, 0, 0);
15453
15454 gcc_assert (GET_CODE (addr) == SET);
15455
15456 addr = SET_SRC (addr);
15457 }
15458 else
15459 {
15460 int i;
15461 extract_insn_cached (insn);
15462 for (i = recog_data.n_operands - 1; i >= 0; --i)
15463 if (MEM_P (recog_data.operand[i]))
15464 {
15465 addr = XEXP (recog_data.operand[i], 0);
15466 goto found;
15467 }
15468 return 0;
15469 found:;
15470 }
15471
15472 return modified_in_p (addr, dep_insn);
15473 }
15474
15475 static int
15476 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15477 {
15478 enum attr_type insn_type, dep_insn_type;
15479 enum attr_memory memory;
15480 rtx set, set2;
15481 int dep_insn_code_number;
15482
15483 /* Anti and output dependencies have zero cost on all CPUs. */
15484 if (REG_NOTE_KIND (link) != 0)
15485 return 0;
15486
15487 dep_insn_code_number = recog_memoized (dep_insn);
15488
15489 /* If we can't recognize the insns, we can't really do anything. */
15490 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15491 return cost;
15492
15493 insn_type = get_attr_type (insn);
15494 dep_insn_type = get_attr_type (dep_insn);
15495
15496 switch (ix86_tune)
15497 {
15498 case PROCESSOR_PENTIUM:
15499 /* Address Generation Interlock adds a cycle of latency. */
15500 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15501 cost += 1;
15502
15503 /* ??? Compares pair with jump/setcc. */
15504 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15505 cost = 0;
15506
15507 /* Floating point stores require value to be ready one cycle earlier. */
15508 if (insn_type == TYPE_FMOV
15509 && get_attr_memory (insn) == MEMORY_STORE
15510 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15511 cost += 1;
15512 break;
15513
15514 case PROCESSOR_PENTIUMPRO:
15515 memory = get_attr_memory (insn);
15516
15517 /* INT->FP conversion is expensive. */
15518 if (get_attr_fp_int_src (dep_insn))
15519 cost += 5;
15520
15521 /* There is one cycle extra latency between an FP op and a store. */
15522 if (insn_type == TYPE_FMOV
15523 && (set = single_set (dep_insn)) != NULL_RTX
15524 && (set2 = single_set (insn)) != NULL_RTX
15525 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15526 && MEM_P (SET_DEST (set2)))
15527 cost += 1;
15528
15529 /* Show ability of reorder buffer to hide latency of load by executing
15530 in parallel with previous instruction in case
15531 previous instruction is not needed to compute the address. */
15532 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15533 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15534 {
15535 /* Claim moves to take one cycle, as core can issue one load
15536 at time and the next load can start cycle later. */
15537 if (dep_insn_type == TYPE_IMOV
15538 || dep_insn_type == TYPE_FMOV)
15539 cost = 1;
15540 else if (cost > 1)
15541 cost--;
15542 }
15543 break;
15544
15545 case PROCESSOR_K6:
15546 memory = get_attr_memory (insn);
15547
15548 /* The esp dependency is resolved before the instruction is really
15549 finished. */
15550 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15551 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15552 return 1;
15553
15554 /* INT->FP conversion is expensive. */
15555 if (get_attr_fp_int_src (dep_insn))
15556 cost += 5;
15557
15558 /* Show ability of reorder buffer to hide latency of load by executing
15559 in parallel with previous instruction in case
15560 previous instruction is not needed to compute the address. */
15561 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15562 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15563 {
15564 /* Claim moves to take one cycle, as core can issue one load
15565 at time and the next load can start cycle later. */
15566 if (dep_insn_type == TYPE_IMOV
15567 || dep_insn_type == TYPE_FMOV)
15568 cost = 1;
15569 else if (cost > 2)
15570 cost -= 2;
15571 else
15572 cost = 1;
15573 }
15574 break;
15575
15576 case PROCESSOR_ATHLON:
15577 case PROCESSOR_K8:
15578 case PROCESSOR_AMDFAM10:
15579 case PROCESSOR_GENERIC32:
15580 case PROCESSOR_GENERIC64:
15581 memory = get_attr_memory (insn);
15582
15583 /* Show ability of reorder buffer to hide latency of load by executing
15584 in parallel with previous instruction in case
15585 previous instruction is not needed to compute the address. */
15586 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15587 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15588 {
15589 enum attr_unit unit = get_attr_unit (insn);
15590 int loadcost = 3;
15591
15592 /* Because of the difference between the length of integer and
15593 floating unit pipeline preparation stages, the memory operands
15594 for floating point are cheaper.
15595
15596 ??? For Athlon it the difference is most probably 2. */
15597 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15598 loadcost = 3;
15599 else
15600 loadcost = TARGET_ATHLON ? 2 : 0;
15601
15602 if (cost >= loadcost)
15603 cost -= loadcost;
15604 else
15605 cost = 0;
15606 }
15607
15608 default:
15609 break;
15610 }
15611
15612 return cost;
15613 }
15614
15615 /* How many alternative schedules to try. This should be as wide as the
15616 scheduling freedom in the DFA, but no wider. Making this value too
15617 large results extra work for the scheduler. */
15618
15619 static int
15620 ia32_multipass_dfa_lookahead (void)
15621 {
15622 if (ix86_tune == PROCESSOR_PENTIUM)
15623 return 2;
15624
15625 if (ix86_tune == PROCESSOR_PENTIUMPRO
15626 || ix86_tune == PROCESSOR_K6)
15627 return 1;
15628
15629 else
15630 return 0;
15631 }
15632
15633 \f
15634 /* Compute the alignment given to a constant that is being placed in memory.
15635 EXP is the constant and ALIGN is the alignment that the object would
15636 ordinarily have.
15637 The value of this function is used instead of that alignment to align
15638 the object. */
15639
15640 int
15641 ix86_constant_alignment (tree exp, int align)
15642 {
15643 if (TREE_CODE (exp) == REAL_CST)
15644 {
15645 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15646 return 64;
15647 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15648 return 128;
15649 }
15650 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15651 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15652 return BITS_PER_WORD;
15653
15654 return align;
15655 }
15656
15657 /* Compute the alignment for a static variable.
15658 TYPE is the data type, and ALIGN is the alignment that
15659 the object would ordinarily have. The value of this function is used
15660 instead of that alignment to align the object. */
15661
15662 int
15663 ix86_data_alignment (tree type, int align)
15664 {
15665 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15666
15667 if (AGGREGATE_TYPE_P (type)
15668 && TYPE_SIZE (type)
15669 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15670 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15671 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15672 && align < max_align)
15673 align = max_align;
15674
15675 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15676 to 16byte boundary. */
15677 if (TARGET_64BIT)
15678 {
15679 if (AGGREGATE_TYPE_P (type)
15680 && TYPE_SIZE (type)
15681 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15682 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15683 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15684 return 128;
15685 }
15686
15687 if (TREE_CODE (type) == ARRAY_TYPE)
15688 {
15689 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15690 return 64;
15691 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15692 return 128;
15693 }
15694 else if (TREE_CODE (type) == COMPLEX_TYPE)
15695 {
15696
15697 if (TYPE_MODE (type) == DCmode && align < 64)
15698 return 64;
15699 if (TYPE_MODE (type) == XCmode && align < 128)
15700 return 128;
15701 }
15702 else if ((TREE_CODE (type) == RECORD_TYPE
15703 || TREE_CODE (type) == UNION_TYPE
15704 || TREE_CODE (type) == QUAL_UNION_TYPE)
15705 && TYPE_FIELDS (type))
15706 {
15707 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15708 return 64;
15709 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15710 return 128;
15711 }
15712 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15713 || TREE_CODE (type) == INTEGER_TYPE)
15714 {
15715 if (TYPE_MODE (type) == DFmode && align < 64)
15716 return 64;
15717 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15718 return 128;
15719 }
15720
15721 return align;
15722 }
15723
15724 /* Compute the alignment for a local variable.
15725 TYPE is the data type, and ALIGN is the alignment that
15726 the object would ordinarily have. The value of this macro is used
15727 instead of that alignment to align the object. */
15728
15729 int
15730 ix86_local_alignment (tree type, int align)
15731 {
15732 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15733 to 16byte boundary. */
15734 if (TARGET_64BIT)
15735 {
15736 if (AGGREGATE_TYPE_P (type)
15737 && TYPE_SIZE (type)
15738 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15739 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15740 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15741 return 128;
15742 }
15743 if (TREE_CODE (type) == ARRAY_TYPE)
15744 {
15745 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15746 return 64;
15747 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15748 return 128;
15749 }
15750 else if (TREE_CODE (type) == COMPLEX_TYPE)
15751 {
15752 if (TYPE_MODE (type) == DCmode && align < 64)
15753 return 64;
15754 if (TYPE_MODE (type) == XCmode && align < 128)
15755 return 128;
15756 }
15757 else if ((TREE_CODE (type) == RECORD_TYPE
15758 || TREE_CODE (type) == UNION_TYPE
15759 || TREE_CODE (type) == QUAL_UNION_TYPE)
15760 && TYPE_FIELDS (type))
15761 {
15762 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15763 return 64;
15764 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15765 return 128;
15766 }
15767 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15768 || TREE_CODE (type) == INTEGER_TYPE)
15769 {
15770
15771 if (TYPE_MODE (type) == DFmode && align < 64)
15772 return 64;
15773 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15774 return 128;
15775 }
15776 return align;
15777 }
15778 \f
15779 /* Emit RTL insns to initialize the variable parts of a trampoline.
15780 FNADDR is an RTX for the address of the function's pure code.
15781 CXT is an RTX for the static chain value for the function. */
15782 void
15783 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15784 {
15785 if (!TARGET_64BIT)
15786 {
15787 /* Compute offset from the end of the jmp to the target function. */
15788 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15789 plus_constant (tramp, 10),
15790 NULL_RTX, 1, OPTAB_DIRECT);
15791 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15792 gen_int_mode (0xb9, QImode));
15793 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15794 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15795 gen_int_mode (0xe9, QImode));
15796 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15797 }
15798 else
15799 {
15800 int offset = 0;
15801 /* Try to load address using shorter movl instead of movabs.
15802 We may want to support movq for kernel mode, but kernel does not use
15803 trampolines at the moment. */
15804 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15805 {
15806 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15807 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15808 gen_int_mode (0xbb41, HImode));
15809 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15810 gen_lowpart (SImode, fnaddr));
15811 offset += 6;
15812 }
15813 else
15814 {
15815 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15816 gen_int_mode (0xbb49, HImode));
15817 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15818 fnaddr);
15819 offset += 10;
15820 }
15821 /* Load static chain using movabs to r10. */
15822 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15823 gen_int_mode (0xba49, HImode));
15824 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15825 cxt);
15826 offset += 10;
15827 /* Jump to the r11 */
15828 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15829 gen_int_mode (0xff49, HImode));
15830 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15831 gen_int_mode (0xe3, QImode));
15832 offset += 3;
15833 gcc_assert (offset <= TRAMPOLINE_SIZE);
15834 }
15835
15836 #ifdef ENABLE_EXECUTE_STACK
15837 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15838 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15839 #endif
15840 }
15841 \f
15842 /* Codes for all the SSE/MMX builtins. */
15843 enum ix86_builtins
15844 {
15845 IX86_BUILTIN_ADDPS,
15846 IX86_BUILTIN_ADDSS,
15847 IX86_BUILTIN_DIVPS,
15848 IX86_BUILTIN_DIVSS,
15849 IX86_BUILTIN_MULPS,
15850 IX86_BUILTIN_MULSS,
15851 IX86_BUILTIN_SUBPS,
15852 IX86_BUILTIN_SUBSS,
15853
15854 IX86_BUILTIN_CMPEQPS,
15855 IX86_BUILTIN_CMPLTPS,
15856 IX86_BUILTIN_CMPLEPS,
15857 IX86_BUILTIN_CMPGTPS,
15858 IX86_BUILTIN_CMPGEPS,
15859 IX86_BUILTIN_CMPNEQPS,
15860 IX86_BUILTIN_CMPNLTPS,
15861 IX86_BUILTIN_CMPNLEPS,
15862 IX86_BUILTIN_CMPNGTPS,
15863 IX86_BUILTIN_CMPNGEPS,
15864 IX86_BUILTIN_CMPORDPS,
15865 IX86_BUILTIN_CMPUNORDPS,
15866 IX86_BUILTIN_CMPEQSS,
15867 IX86_BUILTIN_CMPLTSS,
15868 IX86_BUILTIN_CMPLESS,
15869 IX86_BUILTIN_CMPNEQSS,
15870 IX86_BUILTIN_CMPNLTSS,
15871 IX86_BUILTIN_CMPNLESS,
15872 IX86_BUILTIN_CMPNGTSS,
15873 IX86_BUILTIN_CMPNGESS,
15874 IX86_BUILTIN_CMPORDSS,
15875 IX86_BUILTIN_CMPUNORDSS,
15876
15877 IX86_BUILTIN_COMIEQSS,
15878 IX86_BUILTIN_COMILTSS,
15879 IX86_BUILTIN_COMILESS,
15880 IX86_BUILTIN_COMIGTSS,
15881 IX86_BUILTIN_COMIGESS,
15882 IX86_BUILTIN_COMINEQSS,
15883 IX86_BUILTIN_UCOMIEQSS,
15884 IX86_BUILTIN_UCOMILTSS,
15885 IX86_BUILTIN_UCOMILESS,
15886 IX86_BUILTIN_UCOMIGTSS,
15887 IX86_BUILTIN_UCOMIGESS,
15888 IX86_BUILTIN_UCOMINEQSS,
15889
15890 IX86_BUILTIN_CVTPI2PS,
15891 IX86_BUILTIN_CVTPS2PI,
15892 IX86_BUILTIN_CVTSI2SS,
15893 IX86_BUILTIN_CVTSI642SS,
15894 IX86_BUILTIN_CVTSS2SI,
15895 IX86_BUILTIN_CVTSS2SI64,
15896 IX86_BUILTIN_CVTTPS2PI,
15897 IX86_BUILTIN_CVTTSS2SI,
15898 IX86_BUILTIN_CVTTSS2SI64,
15899
15900 IX86_BUILTIN_MAXPS,
15901 IX86_BUILTIN_MAXSS,
15902 IX86_BUILTIN_MINPS,
15903 IX86_BUILTIN_MINSS,
15904
15905 IX86_BUILTIN_LOADUPS,
15906 IX86_BUILTIN_STOREUPS,
15907 IX86_BUILTIN_MOVSS,
15908
15909 IX86_BUILTIN_MOVHLPS,
15910 IX86_BUILTIN_MOVLHPS,
15911 IX86_BUILTIN_LOADHPS,
15912 IX86_BUILTIN_LOADLPS,
15913 IX86_BUILTIN_STOREHPS,
15914 IX86_BUILTIN_STORELPS,
15915
15916 IX86_BUILTIN_MASKMOVQ,
15917 IX86_BUILTIN_MOVMSKPS,
15918 IX86_BUILTIN_PMOVMSKB,
15919
15920 IX86_BUILTIN_MOVNTPS,
15921 IX86_BUILTIN_MOVNTQ,
15922
15923 IX86_BUILTIN_LOADDQU,
15924 IX86_BUILTIN_STOREDQU,
15925
15926 IX86_BUILTIN_PACKSSWB,
15927 IX86_BUILTIN_PACKSSDW,
15928 IX86_BUILTIN_PACKUSWB,
15929
15930 IX86_BUILTIN_PADDB,
15931 IX86_BUILTIN_PADDW,
15932 IX86_BUILTIN_PADDD,
15933 IX86_BUILTIN_PADDQ,
15934 IX86_BUILTIN_PADDSB,
15935 IX86_BUILTIN_PADDSW,
15936 IX86_BUILTIN_PADDUSB,
15937 IX86_BUILTIN_PADDUSW,
15938 IX86_BUILTIN_PSUBB,
15939 IX86_BUILTIN_PSUBW,
15940 IX86_BUILTIN_PSUBD,
15941 IX86_BUILTIN_PSUBQ,
15942 IX86_BUILTIN_PSUBSB,
15943 IX86_BUILTIN_PSUBSW,
15944 IX86_BUILTIN_PSUBUSB,
15945 IX86_BUILTIN_PSUBUSW,
15946
15947 IX86_BUILTIN_PAND,
15948 IX86_BUILTIN_PANDN,
15949 IX86_BUILTIN_POR,
15950 IX86_BUILTIN_PXOR,
15951
15952 IX86_BUILTIN_PAVGB,
15953 IX86_BUILTIN_PAVGW,
15954
15955 IX86_BUILTIN_PCMPEQB,
15956 IX86_BUILTIN_PCMPEQW,
15957 IX86_BUILTIN_PCMPEQD,
15958 IX86_BUILTIN_PCMPGTB,
15959 IX86_BUILTIN_PCMPGTW,
15960 IX86_BUILTIN_PCMPGTD,
15961
15962 IX86_BUILTIN_PMADDWD,
15963
15964 IX86_BUILTIN_PMAXSW,
15965 IX86_BUILTIN_PMAXUB,
15966 IX86_BUILTIN_PMINSW,
15967 IX86_BUILTIN_PMINUB,
15968
15969 IX86_BUILTIN_PMULHUW,
15970 IX86_BUILTIN_PMULHW,
15971 IX86_BUILTIN_PMULLW,
15972
15973 IX86_BUILTIN_PSADBW,
15974 IX86_BUILTIN_PSHUFW,
15975
15976 IX86_BUILTIN_PSLLW,
15977 IX86_BUILTIN_PSLLD,
15978 IX86_BUILTIN_PSLLQ,
15979 IX86_BUILTIN_PSRAW,
15980 IX86_BUILTIN_PSRAD,
15981 IX86_BUILTIN_PSRLW,
15982 IX86_BUILTIN_PSRLD,
15983 IX86_BUILTIN_PSRLQ,
15984 IX86_BUILTIN_PSLLWI,
15985 IX86_BUILTIN_PSLLDI,
15986 IX86_BUILTIN_PSLLQI,
15987 IX86_BUILTIN_PSRAWI,
15988 IX86_BUILTIN_PSRADI,
15989 IX86_BUILTIN_PSRLWI,
15990 IX86_BUILTIN_PSRLDI,
15991 IX86_BUILTIN_PSRLQI,
15992
15993 IX86_BUILTIN_PUNPCKHBW,
15994 IX86_BUILTIN_PUNPCKHWD,
15995 IX86_BUILTIN_PUNPCKHDQ,
15996 IX86_BUILTIN_PUNPCKLBW,
15997 IX86_BUILTIN_PUNPCKLWD,
15998 IX86_BUILTIN_PUNPCKLDQ,
15999
16000 IX86_BUILTIN_SHUFPS,
16001
16002 IX86_BUILTIN_RCPPS,
16003 IX86_BUILTIN_RCPSS,
16004 IX86_BUILTIN_RSQRTPS,
16005 IX86_BUILTIN_RSQRTSS,
16006 IX86_BUILTIN_SQRTPS,
16007 IX86_BUILTIN_SQRTSS,
16008
16009 IX86_BUILTIN_UNPCKHPS,
16010 IX86_BUILTIN_UNPCKLPS,
16011
16012 IX86_BUILTIN_ANDPS,
16013 IX86_BUILTIN_ANDNPS,
16014 IX86_BUILTIN_ORPS,
16015 IX86_BUILTIN_XORPS,
16016
16017 IX86_BUILTIN_EMMS,
16018 IX86_BUILTIN_LDMXCSR,
16019 IX86_BUILTIN_STMXCSR,
16020 IX86_BUILTIN_SFENCE,
16021
16022 /* 3DNow! Original */
16023 IX86_BUILTIN_FEMMS,
16024 IX86_BUILTIN_PAVGUSB,
16025 IX86_BUILTIN_PF2ID,
16026 IX86_BUILTIN_PFACC,
16027 IX86_BUILTIN_PFADD,
16028 IX86_BUILTIN_PFCMPEQ,
16029 IX86_BUILTIN_PFCMPGE,
16030 IX86_BUILTIN_PFCMPGT,
16031 IX86_BUILTIN_PFMAX,
16032 IX86_BUILTIN_PFMIN,
16033 IX86_BUILTIN_PFMUL,
16034 IX86_BUILTIN_PFRCP,
16035 IX86_BUILTIN_PFRCPIT1,
16036 IX86_BUILTIN_PFRCPIT2,
16037 IX86_BUILTIN_PFRSQIT1,
16038 IX86_BUILTIN_PFRSQRT,
16039 IX86_BUILTIN_PFSUB,
16040 IX86_BUILTIN_PFSUBR,
16041 IX86_BUILTIN_PI2FD,
16042 IX86_BUILTIN_PMULHRW,
16043
16044 /* 3DNow! Athlon Extensions */
16045 IX86_BUILTIN_PF2IW,
16046 IX86_BUILTIN_PFNACC,
16047 IX86_BUILTIN_PFPNACC,
16048 IX86_BUILTIN_PI2FW,
16049 IX86_BUILTIN_PSWAPDSI,
16050 IX86_BUILTIN_PSWAPDSF,
16051
16052 /* SSE2 */
16053 IX86_BUILTIN_ADDPD,
16054 IX86_BUILTIN_ADDSD,
16055 IX86_BUILTIN_DIVPD,
16056 IX86_BUILTIN_DIVSD,
16057 IX86_BUILTIN_MULPD,
16058 IX86_BUILTIN_MULSD,
16059 IX86_BUILTIN_SUBPD,
16060 IX86_BUILTIN_SUBSD,
16061
16062 IX86_BUILTIN_CMPEQPD,
16063 IX86_BUILTIN_CMPLTPD,
16064 IX86_BUILTIN_CMPLEPD,
16065 IX86_BUILTIN_CMPGTPD,
16066 IX86_BUILTIN_CMPGEPD,
16067 IX86_BUILTIN_CMPNEQPD,
16068 IX86_BUILTIN_CMPNLTPD,
16069 IX86_BUILTIN_CMPNLEPD,
16070 IX86_BUILTIN_CMPNGTPD,
16071 IX86_BUILTIN_CMPNGEPD,
16072 IX86_BUILTIN_CMPORDPD,
16073 IX86_BUILTIN_CMPUNORDPD,
16074 IX86_BUILTIN_CMPNEPD,
16075 IX86_BUILTIN_CMPEQSD,
16076 IX86_BUILTIN_CMPLTSD,
16077 IX86_BUILTIN_CMPLESD,
16078 IX86_BUILTIN_CMPNEQSD,
16079 IX86_BUILTIN_CMPNLTSD,
16080 IX86_BUILTIN_CMPNLESD,
16081 IX86_BUILTIN_CMPORDSD,
16082 IX86_BUILTIN_CMPUNORDSD,
16083 IX86_BUILTIN_CMPNESD,
16084
16085 IX86_BUILTIN_COMIEQSD,
16086 IX86_BUILTIN_COMILTSD,
16087 IX86_BUILTIN_COMILESD,
16088 IX86_BUILTIN_COMIGTSD,
16089 IX86_BUILTIN_COMIGESD,
16090 IX86_BUILTIN_COMINEQSD,
16091 IX86_BUILTIN_UCOMIEQSD,
16092 IX86_BUILTIN_UCOMILTSD,
16093 IX86_BUILTIN_UCOMILESD,
16094 IX86_BUILTIN_UCOMIGTSD,
16095 IX86_BUILTIN_UCOMIGESD,
16096 IX86_BUILTIN_UCOMINEQSD,
16097
16098 IX86_BUILTIN_MAXPD,
16099 IX86_BUILTIN_MAXSD,
16100 IX86_BUILTIN_MINPD,
16101 IX86_BUILTIN_MINSD,
16102
16103 IX86_BUILTIN_ANDPD,
16104 IX86_BUILTIN_ANDNPD,
16105 IX86_BUILTIN_ORPD,
16106 IX86_BUILTIN_XORPD,
16107
16108 IX86_BUILTIN_SQRTPD,
16109 IX86_BUILTIN_SQRTSD,
16110
16111 IX86_BUILTIN_UNPCKHPD,
16112 IX86_BUILTIN_UNPCKLPD,
16113
16114 IX86_BUILTIN_SHUFPD,
16115
16116 IX86_BUILTIN_LOADUPD,
16117 IX86_BUILTIN_STOREUPD,
16118 IX86_BUILTIN_MOVSD,
16119
16120 IX86_BUILTIN_LOADHPD,
16121 IX86_BUILTIN_LOADLPD,
16122
16123 IX86_BUILTIN_CVTDQ2PD,
16124 IX86_BUILTIN_CVTDQ2PS,
16125
16126 IX86_BUILTIN_CVTPD2DQ,
16127 IX86_BUILTIN_CVTPD2PI,
16128 IX86_BUILTIN_CVTPD2PS,
16129 IX86_BUILTIN_CVTTPD2DQ,
16130 IX86_BUILTIN_CVTTPD2PI,
16131
16132 IX86_BUILTIN_CVTPI2PD,
16133 IX86_BUILTIN_CVTSI2SD,
16134 IX86_BUILTIN_CVTSI642SD,
16135
16136 IX86_BUILTIN_CVTSD2SI,
16137 IX86_BUILTIN_CVTSD2SI64,
16138 IX86_BUILTIN_CVTSD2SS,
16139 IX86_BUILTIN_CVTSS2SD,
16140 IX86_BUILTIN_CVTTSD2SI,
16141 IX86_BUILTIN_CVTTSD2SI64,
16142
16143 IX86_BUILTIN_CVTPS2DQ,
16144 IX86_BUILTIN_CVTPS2PD,
16145 IX86_BUILTIN_CVTTPS2DQ,
16146
16147 IX86_BUILTIN_MOVNTI,
16148 IX86_BUILTIN_MOVNTPD,
16149 IX86_BUILTIN_MOVNTDQ,
16150
16151 /* SSE2 MMX */
16152 IX86_BUILTIN_MASKMOVDQU,
16153 IX86_BUILTIN_MOVMSKPD,
16154 IX86_BUILTIN_PMOVMSKB128,
16155
16156 IX86_BUILTIN_PACKSSWB128,
16157 IX86_BUILTIN_PACKSSDW128,
16158 IX86_BUILTIN_PACKUSWB128,
16159
16160 IX86_BUILTIN_PADDB128,
16161 IX86_BUILTIN_PADDW128,
16162 IX86_BUILTIN_PADDD128,
16163 IX86_BUILTIN_PADDQ128,
16164 IX86_BUILTIN_PADDSB128,
16165 IX86_BUILTIN_PADDSW128,
16166 IX86_BUILTIN_PADDUSB128,
16167 IX86_BUILTIN_PADDUSW128,
16168 IX86_BUILTIN_PSUBB128,
16169 IX86_BUILTIN_PSUBW128,
16170 IX86_BUILTIN_PSUBD128,
16171 IX86_BUILTIN_PSUBQ128,
16172 IX86_BUILTIN_PSUBSB128,
16173 IX86_BUILTIN_PSUBSW128,
16174 IX86_BUILTIN_PSUBUSB128,
16175 IX86_BUILTIN_PSUBUSW128,
16176
16177 IX86_BUILTIN_PAND128,
16178 IX86_BUILTIN_PANDN128,
16179 IX86_BUILTIN_POR128,
16180 IX86_BUILTIN_PXOR128,
16181
16182 IX86_BUILTIN_PAVGB128,
16183 IX86_BUILTIN_PAVGW128,
16184
16185 IX86_BUILTIN_PCMPEQB128,
16186 IX86_BUILTIN_PCMPEQW128,
16187 IX86_BUILTIN_PCMPEQD128,
16188 IX86_BUILTIN_PCMPGTB128,
16189 IX86_BUILTIN_PCMPGTW128,
16190 IX86_BUILTIN_PCMPGTD128,
16191
16192 IX86_BUILTIN_PMADDWD128,
16193
16194 IX86_BUILTIN_PMAXSW128,
16195 IX86_BUILTIN_PMAXUB128,
16196 IX86_BUILTIN_PMINSW128,
16197 IX86_BUILTIN_PMINUB128,
16198
16199 IX86_BUILTIN_PMULUDQ,
16200 IX86_BUILTIN_PMULUDQ128,
16201 IX86_BUILTIN_PMULHUW128,
16202 IX86_BUILTIN_PMULHW128,
16203 IX86_BUILTIN_PMULLW128,
16204
16205 IX86_BUILTIN_PSADBW128,
16206 IX86_BUILTIN_PSHUFHW,
16207 IX86_BUILTIN_PSHUFLW,
16208 IX86_BUILTIN_PSHUFD,
16209
16210 IX86_BUILTIN_PSLLW128,
16211 IX86_BUILTIN_PSLLD128,
16212 IX86_BUILTIN_PSLLQ128,
16213 IX86_BUILTIN_PSRAW128,
16214 IX86_BUILTIN_PSRAD128,
16215 IX86_BUILTIN_PSRLW128,
16216 IX86_BUILTIN_PSRLD128,
16217 IX86_BUILTIN_PSRLQ128,
16218 IX86_BUILTIN_PSLLDQI128,
16219 IX86_BUILTIN_PSLLWI128,
16220 IX86_BUILTIN_PSLLDI128,
16221 IX86_BUILTIN_PSLLQI128,
16222 IX86_BUILTIN_PSRAWI128,
16223 IX86_BUILTIN_PSRADI128,
16224 IX86_BUILTIN_PSRLDQI128,
16225 IX86_BUILTIN_PSRLWI128,
16226 IX86_BUILTIN_PSRLDI128,
16227 IX86_BUILTIN_PSRLQI128,
16228
16229 IX86_BUILTIN_PUNPCKHBW128,
16230 IX86_BUILTIN_PUNPCKHWD128,
16231 IX86_BUILTIN_PUNPCKHDQ128,
16232 IX86_BUILTIN_PUNPCKHQDQ128,
16233 IX86_BUILTIN_PUNPCKLBW128,
16234 IX86_BUILTIN_PUNPCKLWD128,
16235 IX86_BUILTIN_PUNPCKLDQ128,
16236 IX86_BUILTIN_PUNPCKLQDQ128,
16237
16238 IX86_BUILTIN_CLFLUSH,
16239 IX86_BUILTIN_MFENCE,
16240 IX86_BUILTIN_LFENCE,
16241
16242 /* Prescott New Instructions. */
16243 IX86_BUILTIN_ADDSUBPS,
16244 IX86_BUILTIN_HADDPS,
16245 IX86_BUILTIN_HSUBPS,
16246 IX86_BUILTIN_MOVSHDUP,
16247 IX86_BUILTIN_MOVSLDUP,
16248 IX86_BUILTIN_ADDSUBPD,
16249 IX86_BUILTIN_HADDPD,
16250 IX86_BUILTIN_HSUBPD,
16251 IX86_BUILTIN_LDDQU,
16252
16253 IX86_BUILTIN_MONITOR,
16254 IX86_BUILTIN_MWAIT,
16255
16256 /* SSSE3. */
16257 IX86_BUILTIN_PHADDW,
16258 IX86_BUILTIN_PHADDD,
16259 IX86_BUILTIN_PHADDSW,
16260 IX86_BUILTIN_PHSUBW,
16261 IX86_BUILTIN_PHSUBD,
16262 IX86_BUILTIN_PHSUBSW,
16263 IX86_BUILTIN_PMADDUBSW,
16264 IX86_BUILTIN_PMULHRSW,
16265 IX86_BUILTIN_PSHUFB,
16266 IX86_BUILTIN_PSIGNB,
16267 IX86_BUILTIN_PSIGNW,
16268 IX86_BUILTIN_PSIGND,
16269 IX86_BUILTIN_PALIGNR,
16270 IX86_BUILTIN_PABSB,
16271 IX86_BUILTIN_PABSW,
16272 IX86_BUILTIN_PABSD,
16273
16274 IX86_BUILTIN_PHADDW128,
16275 IX86_BUILTIN_PHADDD128,
16276 IX86_BUILTIN_PHADDSW128,
16277 IX86_BUILTIN_PHSUBW128,
16278 IX86_BUILTIN_PHSUBD128,
16279 IX86_BUILTIN_PHSUBSW128,
16280 IX86_BUILTIN_PMADDUBSW128,
16281 IX86_BUILTIN_PMULHRSW128,
16282 IX86_BUILTIN_PSHUFB128,
16283 IX86_BUILTIN_PSIGNB128,
16284 IX86_BUILTIN_PSIGNW128,
16285 IX86_BUILTIN_PSIGND128,
16286 IX86_BUILTIN_PALIGNR128,
16287 IX86_BUILTIN_PABSB128,
16288 IX86_BUILTIN_PABSW128,
16289 IX86_BUILTIN_PABSD128,
16290
16291 /* AMDFAM10 - SSE4A New Instructions. */
16292 IX86_BUILTIN_MOVNTSD,
16293 IX86_BUILTIN_MOVNTSS,
16294 IX86_BUILTIN_EXTRQI,
16295 IX86_BUILTIN_EXTRQ,
16296 IX86_BUILTIN_INSERTQI,
16297 IX86_BUILTIN_INSERTQ,
16298
16299 IX86_BUILTIN_VEC_INIT_V2SI,
16300 IX86_BUILTIN_VEC_INIT_V4HI,
16301 IX86_BUILTIN_VEC_INIT_V8QI,
16302 IX86_BUILTIN_VEC_EXT_V2DF,
16303 IX86_BUILTIN_VEC_EXT_V2DI,
16304 IX86_BUILTIN_VEC_EXT_V4SF,
16305 IX86_BUILTIN_VEC_EXT_V4SI,
16306 IX86_BUILTIN_VEC_EXT_V8HI,
16307 IX86_BUILTIN_VEC_EXT_V2SI,
16308 IX86_BUILTIN_VEC_EXT_V4HI,
16309 IX86_BUILTIN_VEC_SET_V8HI,
16310 IX86_BUILTIN_VEC_SET_V4HI,
16311
16312 IX86_BUILTIN_MAX
16313 };
16314
16315 /* Table for the ix86 builtin decls. */
16316 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16317
16318 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16319 * if the target_flags include one of MASK. Stores the function decl
16320 * in the ix86_builtins array.
16321 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16322
16323 static inline tree
16324 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16325 {
16326 tree decl = NULL_TREE;
16327
16328 if (mask & target_flags
16329 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16330 {
16331 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16332 NULL, NULL_TREE);
16333 ix86_builtins[(int) code] = decl;
16334 }
16335
16336 return decl;
16337 }
16338
16339 /* Like def_builtin, but also marks the function decl "const". */
16340
16341 static inline tree
16342 def_builtin_const (int mask, const char *name, tree type,
16343 enum ix86_builtins code)
16344 {
16345 tree decl = def_builtin (mask, name, type, code);
16346 if (decl)
16347 TREE_READONLY (decl) = 1;
16348 return decl;
16349 }
16350
16351 /* Bits for builtin_description.flag. */
16352
16353 /* Set when we don't support the comparison natively, and should
16354 swap_comparison in order to support it. */
16355 #define BUILTIN_DESC_SWAP_OPERANDS 1
16356
16357 struct builtin_description
16358 {
16359 const unsigned int mask;
16360 const enum insn_code icode;
16361 const char *const name;
16362 const enum ix86_builtins code;
16363 const enum rtx_code comparison;
16364 const unsigned int flag;
16365 };
16366
16367 static const struct builtin_description bdesc_comi[] =
16368 {
16369 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16370 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16371 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16372 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16373 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16374 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16375 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16376 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16377 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16378 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16379 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16380 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16381 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16382 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16384 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16385 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16386 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16387 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16388 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16389 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16390 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16391 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16392 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16393 };
16394
16395 static const struct builtin_description bdesc_2arg[] =
16396 {
16397 /* SSE */
16398 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16399 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16400 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16401 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16402 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16403 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16404 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16405 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16406
16407 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16408 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16409 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16410 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16411 BUILTIN_DESC_SWAP_OPERANDS },
16412 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16413 BUILTIN_DESC_SWAP_OPERANDS },
16414 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16415 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16416 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16417 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16418 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16419 BUILTIN_DESC_SWAP_OPERANDS },
16420 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16421 BUILTIN_DESC_SWAP_OPERANDS },
16422 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16423 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16424 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16425 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16426 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16427 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16428 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16429 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16430 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16431 BUILTIN_DESC_SWAP_OPERANDS },
16432 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16433 BUILTIN_DESC_SWAP_OPERANDS },
16434 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16435
16436 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16437 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16438 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16439 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16440
16441 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16442 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16443 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16444 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16445
16446 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16447 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16448 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16449 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16450 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16451
16452 /* MMX */
16453 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16454 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16455 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16456 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16457 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16458 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16459 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16460 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16461
16462 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16463 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16464 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16465 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16466 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16467 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16468 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16469 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16470
16471 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16472 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16473 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16474
16475 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16476 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16477 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16478 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16479
16480 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16481 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16482
16483 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16484 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16485 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16486 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16487 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16488 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16489
16490 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16491 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16492 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16493 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16494
16495 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16496 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16497 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16498 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16499 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16500 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16501
16502 /* Special. */
16503 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16504 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16505 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16506
16507 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16508 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16509 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16510
16511 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16512 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16513 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16514 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16515 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16516 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16517
16518 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16519 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16520 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16521 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16522 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16523 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16524
16525 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16526 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16527 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16528 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16529
16530 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16531 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16532
16533 /* SSE2 */
16534 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16535 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16536 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16537 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16538 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16539 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16540 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16541 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16542
16543 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16544 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16545 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16546 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16547 BUILTIN_DESC_SWAP_OPERANDS },
16548 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16549 BUILTIN_DESC_SWAP_OPERANDS },
16550 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16551 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16552 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16553 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16554 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16555 BUILTIN_DESC_SWAP_OPERANDS },
16556 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16557 BUILTIN_DESC_SWAP_OPERANDS },
16558 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16559 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16560 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16561 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16562 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16563 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16564 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16565 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16566 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16567
16568 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16569 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16570 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16571 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16572
16573 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16574 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16575 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16576 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16577
16578 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16579 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16580 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16581
16582 /* SSE2 MMX */
16583 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16584 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16585 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16586 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16587 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16588 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16589 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16590 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16591
16592 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16593 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16594 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16595 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16596 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16597 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16598 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16599 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16600
16601 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16602 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16603
16604 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16605 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16606 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16607 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16608
16609 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16610 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16611
16612 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16613 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16614 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16615 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16616 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16617 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16618
16619 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16620 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16621 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16622 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16623
16624 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16625 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16626 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16627 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16628 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16629 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16630 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16631 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16632
16633 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16634 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16635 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16636
16637 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16638 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16639
16640 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16641 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16642
16643 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16644 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16645 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16646
16647 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16648 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16649 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16650
16651 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16652 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16653
16654 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16655
16656 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16657 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16658 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16659 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16660
16661 /* SSE3 MMX */
16662 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16663 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16664 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16665 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16666 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16667 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16668
16669 /* SSSE3 */
16670 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16671 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16672 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16673 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16674 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16675 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16676 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16677 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16678 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16679 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16680 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16681 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16682 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16683 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16684 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16685 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16686 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16687 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16688 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16689 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16690 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16691 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16692 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16693 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16694 };
16695
16696 static const struct builtin_description bdesc_1arg[] =
16697 {
16698 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16699 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16700
16701 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16702 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16703 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16704
16705 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16706 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16707 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16708 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16709 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16710 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16711
16712 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16713 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16714
16715 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16716
16717 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16718 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16719
16720 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16721 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16722 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16723 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16724 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16725
16726 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16727
16728 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16729 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16730 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16731 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16732
16733 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16734 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16735 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16736
16737 /* SSE3 */
16738 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16739 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16740
16741 /* SSSE3 */
16742 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16743 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16744 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16745 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16746 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16747 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16748 };
16749
16750 static void
16751 ix86_init_builtins (void)
16752 {
16753 if (TARGET_MMX)
16754 ix86_init_mmx_sse_builtins ();
16755 }
16756
16757 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16758 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16759 builtins. */
16760 static void
16761 ix86_init_mmx_sse_builtins (void)
16762 {
16763 const struct builtin_description * d;
16764 size_t i;
16765
16766 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16767 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16768 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16769 tree V2DI_type_node
16770 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16771 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16772 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16773 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16774 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16775 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16776 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16777
16778 tree pchar_type_node = build_pointer_type (char_type_node);
16779 tree pcchar_type_node = build_pointer_type (
16780 build_type_variant (char_type_node, 1, 0));
16781 tree pfloat_type_node = build_pointer_type (float_type_node);
16782 tree pcfloat_type_node = build_pointer_type (
16783 build_type_variant (float_type_node, 1, 0));
16784 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16785 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16786 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16787
16788 /* Comparisons. */
16789 tree int_ftype_v4sf_v4sf
16790 = build_function_type_list (integer_type_node,
16791 V4SF_type_node, V4SF_type_node, NULL_TREE);
16792 tree v4si_ftype_v4sf_v4sf
16793 = build_function_type_list (V4SI_type_node,
16794 V4SF_type_node, V4SF_type_node, NULL_TREE);
16795 /* MMX/SSE/integer conversions. */
16796 tree int_ftype_v4sf
16797 = build_function_type_list (integer_type_node,
16798 V4SF_type_node, NULL_TREE);
16799 tree int64_ftype_v4sf
16800 = build_function_type_list (long_long_integer_type_node,
16801 V4SF_type_node, NULL_TREE);
16802 tree int_ftype_v8qi
16803 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16804 tree v4sf_ftype_v4sf_int
16805 = build_function_type_list (V4SF_type_node,
16806 V4SF_type_node, integer_type_node, NULL_TREE);
16807 tree v4sf_ftype_v4sf_int64
16808 = build_function_type_list (V4SF_type_node,
16809 V4SF_type_node, long_long_integer_type_node,
16810 NULL_TREE);
16811 tree v4sf_ftype_v4sf_v2si
16812 = build_function_type_list (V4SF_type_node,
16813 V4SF_type_node, V2SI_type_node, NULL_TREE);
16814
16815 /* Miscellaneous. */
16816 tree v8qi_ftype_v4hi_v4hi
16817 = build_function_type_list (V8QI_type_node,
16818 V4HI_type_node, V4HI_type_node, NULL_TREE);
16819 tree v4hi_ftype_v2si_v2si
16820 = build_function_type_list (V4HI_type_node,
16821 V2SI_type_node, V2SI_type_node, NULL_TREE);
16822 tree v4sf_ftype_v4sf_v4sf_int
16823 = build_function_type_list (V4SF_type_node,
16824 V4SF_type_node, V4SF_type_node,
16825 integer_type_node, NULL_TREE);
16826 tree v2si_ftype_v4hi_v4hi
16827 = build_function_type_list (V2SI_type_node,
16828 V4HI_type_node, V4HI_type_node, NULL_TREE);
16829 tree v4hi_ftype_v4hi_int
16830 = build_function_type_list (V4HI_type_node,
16831 V4HI_type_node, integer_type_node, NULL_TREE);
16832 tree v4hi_ftype_v4hi_di
16833 = build_function_type_list (V4HI_type_node,
16834 V4HI_type_node, long_long_unsigned_type_node,
16835 NULL_TREE);
16836 tree v2si_ftype_v2si_di
16837 = build_function_type_list (V2SI_type_node,
16838 V2SI_type_node, long_long_unsigned_type_node,
16839 NULL_TREE);
16840 tree void_ftype_void
16841 = build_function_type (void_type_node, void_list_node);
16842 tree void_ftype_unsigned
16843 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16844 tree void_ftype_unsigned_unsigned
16845 = build_function_type_list (void_type_node, unsigned_type_node,
16846 unsigned_type_node, NULL_TREE);
16847 tree void_ftype_pcvoid_unsigned_unsigned
16848 = build_function_type_list (void_type_node, const_ptr_type_node,
16849 unsigned_type_node, unsigned_type_node,
16850 NULL_TREE);
16851 tree unsigned_ftype_void
16852 = build_function_type (unsigned_type_node, void_list_node);
16853 tree v2si_ftype_v4sf
16854 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16855 /* Loads/stores. */
16856 tree void_ftype_v8qi_v8qi_pchar
16857 = build_function_type_list (void_type_node,
16858 V8QI_type_node, V8QI_type_node,
16859 pchar_type_node, NULL_TREE);
16860 tree v4sf_ftype_pcfloat
16861 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16862 /* @@@ the type is bogus */
16863 tree v4sf_ftype_v4sf_pv2si
16864 = build_function_type_list (V4SF_type_node,
16865 V4SF_type_node, pv2si_type_node, NULL_TREE);
16866 tree void_ftype_pv2si_v4sf
16867 = build_function_type_list (void_type_node,
16868 pv2si_type_node, V4SF_type_node, NULL_TREE);
16869 tree void_ftype_pfloat_v4sf
16870 = build_function_type_list (void_type_node,
16871 pfloat_type_node, V4SF_type_node, NULL_TREE);
16872 tree void_ftype_pdi_di
16873 = build_function_type_list (void_type_node,
16874 pdi_type_node, long_long_unsigned_type_node,
16875 NULL_TREE);
16876 tree void_ftype_pv2di_v2di
16877 = build_function_type_list (void_type_node,
16878 pv2di_type_node, V2DI_type_node, NULL_TREE);
16879 /* Normal vector unops. */
16880 tree v4sf_ftype_v4sf
16881 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16882 tree v16qi_ftype_v16qi
16883 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16884 tree v8hi_ftype_v8hi
16885 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16886 tree v4si_ftype_v4si
16887 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16888 tree v8qi_ftype_v8qi
16889 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16890 tree v4hi_ftype_v4hi
16891 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16892
16893 /* Normal vector binops. */
16894 tree v4sf_ftype_v4sf_v4sf
16895 = build_function_type_list (V4SF_type_node,
16896 V4SF_type_node, V4SF_type_node, NULL_TREE);
16897 tree v8qi_ftype_v8qi_v8qi
16898 = build_function_type_list (V8QI_type_node,
16899 V8QI_type_node, V8QI_type_node, NULL_TREE);
16900 tree v4hi_ftype_v4hi_v4hi
16901 = build_function_type_list (V4HI_type_node,
16902 V4HI_type_node, V4HI_type_node, NULL_TREE);
16903 tree v2si_ftype_v2si_v2si
16904 = build_function_type_list (V2SI_type_node,
16905 V2SI_type_node, V2SI_type_node, NULL_TREE);
16906 tree di_ftype_di_di
16907 = build_function_type_list (long_long_unsigned_type_node,
16908 long_long_unsigned_type_node,
16909 long_long_unsigned_type_node, NULL_TREE);
16910
16911 tree di_ftype_di_di_int
16912 = build_function_type_list (long_long_unsigned_type_node,
16913 long_long_unsigned_type_node,
16914 long_long_unsigned_type_node,
16915 integer_type_node, NULL_TREE);
16916
16917 tree v2si_ftype_v2sf
16918 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16919 tree v2sf_ftype_v2si
16920 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16921 tree v2si_ftype_v2si
16922 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16923 tree v2sf_ftype_v2sf
16924 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16925 tree v2sf_ftype_v2sf_v2sf
16926 = build_function_type_list (V2SF_type_node,
16927 V2SF_type_node, V2SF_type_node, NULL_TREE);
16928 tree v2si_ftype_v2sf_v2sf
16929 = build_function_type_list (V2SI_type_node,
16930 V2SF_type_node, V2SF_type_node, NULL_TREE);
16931 tree pint_type_node = build_pointer_type (integer_type_node);
16932 tree pdouble_type_node = build_pointer_type (double_type_node);
16933 tree pcdouble_type_node = build_pointer_type (
16934 build_type_variant (double_type_node, 1, 0));
16935 tree int_ftype_v2df_v2df
16936 = build_function_type_list (integer_type_node,
16937 V2DF_type_node, V2DF_type_node, NULL_TREE);
16938
16939 tree void_ftype_pcvoid
16940 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16941 tree v4sf_ftype_v4si
16942 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16943 tree v4si_ftype_v4sf
16944 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16945 tree v2df_ftype_v4si
16946 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16947 tree v4si_ftype_v2df
16948 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16949 tree v2si_ftype_v2df
16950 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16951 tree v4sf_ftype_v2df
16952 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16953 tree v2df_ftype_v2si
16954 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16955 tree v2df_ftype_v4sf
16956 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16957 tree int_ftype_v2df
16958 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16959 tree int64_ftype_v2df
16960 = build_function_type_list (long_long_integer_type_node,
16961 V2DF_type_node, NULL_TREE);
16962 tree v2df_ftype_v2df_int
16963 = build_function_type_list (V2DF_type_node,
16964 V2DF_type_node, integer_type_node, NULL_TREE);
16965 tree v2df_ftype_v2df_int64
16966 = build_function_type_list (V2DF_type_node,
16967 V2DF_type_node, long_long_integer_type_node,
16968 NULL_TREE);
16969 tree v4sf_ftype_v4sf_v2df
16970 = build_function_type_list (V4SF_type_node,
16971 V4SF_type_node, V2DF_type_node, NULL_TREE);
16972 tree v2df_ftype_v2df_v4sf
16973 = build_function_type_list (V2DF_type_node,
16974 V2DF_type_node, V4SF_type_node, NULL_TREE);
16975 tree v2df_ftype_v2df_v2df_int
16976 = build_function_type_list (V2DF_type_node,
16977 V2DF_type_node, V2DF_type_node,
16978 integer_type_node,
16979 NULL_TREE);
16980 tree v2df_ftype_v2df_pcdouble
16981 = build_function_type_list (V2DF_type_node,
16982 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16983 tree void_ftype_pdouble_v2df
16984 = build_function_type_list (void_type_node,
16985 pdouble_type_node, V2DF_type_node, NULL_TREE);
16986 tree void_ftype_pint_int
16987 = build_function_type_list (void_type_node,
16988 pint_type_node, integer_type_node, NULL_TREE);
16989 tree void_ftype_v16qi_v16qi_pchar
16990 = build_function_type_list (void_type_node,
16991 V16QI_type_node, V16QI_type_node,
16992 pchar_type_node, NULL_TREE);
16993 tree v2df_ftype_pcdouble
16994 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16995 tree v2df_ftype_v2df_v2df
16996 = build_function_type_list (V2DF_type_node,
16997 V2DF_type_node, V2DF_type_node, NULL_TREE);
16998 tree v16qi_ftype_v16qi_v16qi
16999 = build_function_type_list (V16QI_type_node,
17000 V16QI_type_node, V16QI_type_node, NULL_TREE);
17001 tree v8hi_ftype_v8hi_v8hi
17002 = build_function_type_list (V8HI_type_node,
17003 V8HI_type_node, V8HI_type_node, NULL_TREE);
17004 tree v4si_ftype_v4si_v4si
17005 = build_function_type_list (V4SI_type_node,
17006 V4SI_type_node, V4SI_type_node, NULL_TREE);
17007 tree v2di_ftype_v2di_v2di
17008 = build_function_type_list (V2DI_type_node,
17009 V2DI_type_node, V2DI_type_node, NULL_TREE);
17010 tree v2di_ftype_v2df_v2df
17011 = build_function_type_list (V2DI_type_node,
17012 V2DF_type_node, V2DF_type_node, NULL_TREE);
17013 tree v2df_ftype_v2df
17014 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17015 tree v2di_ftype_v2di_int
17016 = build_function_type_list (V2DI_type_node,
17017 V2DI_type_node, integer_type_node, NULL_TREE);
17018 tree v2di_ftype_v2di_v2di_int
17019 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17020 V2DI_type_node, integer_type_node, NULL_TREE);
17021 tree v4si_ftype_v4si_int
17022 = build_function_type_list (V4SI_type_node,
17023 V4SI_type_node, integer_type_node, NULL_TREE);
17024 tree v8hi_ftype_v8hi_int
17025 = build_function_type_list (V8HI_type_node,
17026 V8HI_type_node, integer_type_node, NULL_TREE);
17027 tree v8hi_ftype_v8hi_v2di
17028 = build_function_type_list (V8HI_type_node,
17029 V8HI_type_node, V2DI_type_node, NULL_TREE);
17030 tree v4si_ftype_v4si_v2di
17031 = build_function_type_list (V4SI_type_node,
17032 V4SI_type_node, V2DI_type_node, NULL_TREE);
17033 tree v4si_ftype_v8hi_v8hi
17034 = build_function_type_list (V4SI_type_node,
17035 V8HI_type_node, V8HI_type_node, NULL_TREE);
17036 tree di_ftype_v8qi_v8qi
17037 = build_function_type_list (long_long_unsigned_type_node,
17038 V8QI_type_node, V8QI_type_node, NULL_TREE);
17039 tree di_ftype_v2si_v2si
17040 = build_function_type_list (long_long_unsigned_type_node,
17041 V2SI_type_node, V2SI_type_node, NULL_TREE);
17042 tree v2di_ftype_v16qi_v16qi
17043 = build_function_type_list (V2DI_type_node,
17044 V16QI_type_node, V16QI_type_node, NULL_TREE);
17045 tree v2di_ftype_v4si_v4si
17046 = build_function_type_list (V2DI_type_node,
17047 V4SI_type_node, V4SI_type_node, NULL_TREE);
17048 tree int_ftype_v16qi
17049 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17050 tree v16qi_ftype_pcchar
17051 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17052 tree void_ftype_pchar_v16qi
17053 = build_function_type_list (void_type_node,
17054 pchar_type_node, V16QI_type_node, NULL_TREE);
17055
17056 tree v2di_ftype_v2di_unsigned_unsigned
17057 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17058 unsigned_type_node, unsigned_type_node,
17059 NULL_TREE);
17060 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17061 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17062 unsigned_type_node, unsigned_type_node,
17063 NULL_TREE);
17064 tree v2di_ftype_v2di_v16qi
17065 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17066 NULL_TREE);
17067
17068 tree float80_type;
17069 tree float128_type;
17070 tree ftype;
17071
17072 /* The __float80 type. */
17073 if (TYPE_MODE (long_double_type_node) == XFmode)
17074 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17075 "__float80");
17076 else
17077 {
17078 /* The __float80 type. */
17079 float80_type = make_node (REAL_TYPE);
17080 TYPE_PRECISION (float80_type) = 80;
17081 layout_type (float80_type);
17082 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
17083 }
17084
17085 if (TARGET_64BIT)
17086 {
17087 float128_type = make_node (REAL_TYPE);
17088 TYPE_PRECISION (float128_type) = 128;
17089 layout_type (float128_type);
17090 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
17091 }
17092
17093 /* Add all builtins that are more or less simple operations on two
17094 operands. */
17095 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17096 {
17097 /* Use one of the operands; the target can have a different mode for
17098 mask-generating compares. */
17099 enum machine_mode mode;
17100 tree type;
17101
17102 if (d->name == 0)
17103 continue;
17104 mode = insn_data[d->icode].operand[1].mode;
17105
17106 switch (mode)
17107 {
17108 case V16QImode:
17109 type = v16qi_ftype_v16qi_v16qi;
17110 break;
17111 case V8HImode:
17112 type = v8hi_ftype_v8hi_v8hi;
17113 break;
17114 case V4SImode:
17115 type = v4si_ftype_v4si_v4si;
17116 break;
17117 case V2DImode:
17118 type = v2di_ftype_v2di_v2di;
17119 break;
17120 case V2DFmode:
17121 type = v2df_ftype_v2df_v2df;
17122 break;
17123 case V4SFmode:
17124 type = v4sf_ftype_v4sf_v4sf;
17125 break;
17126 case V8QImode:
17127 type = v8qi_ftype_v8qi_v8qi;
17128 break;
17129 case V4HImode:
17130 type = v4hi_ftype_v4hi_v4hi;
17131 break;
17132 case V2SImode:
17133 type = v2si_ftype_v2si_v2si;
17134 break;
17135 case DImode:
17136 type = di_ftype_di_di;
17137 break;
17138
17139 default:
17140 gcc_unreachable ();
17141 }
17142
17143 /* Override for comparisons. */
17144 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17145 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17146 type = v4si_ftype_v4sf_v4sf;
17147
17148 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
17149 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
17150 type = v2di_ftype_v2df_v2df;
17151
17152 def_builtin (d->mask, d->name, type, d->code);
17153 }
17154
17155 /* Add all builtins that are more or less simple operations on 1 operand. */
17156 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
17157 {
17158 enum machine_mode mode;
17159 tree type;
17160
17161 if (d->name == 0)
17162 continue;
17163 mode = insn_data[d->icode].operand[1].mode;
17164
17165 switch (mode)
17166 {
17167 case V16QImode:
17168 type = v16qi_ftype_v16qi;
17169 break;
17170 case V8HImode:
17171 type = v8hi_ftype_v8hi;
17172 break;
17173 case V4SImode:
17174 type = v4si_ftype_v4si;
17175 break;
17176 case V2DFmode:
17177 type = v2df_ftype_v2df;
17178 break;
17179 case V4SFmode:
17180 type = v4sf_ftype_v4sf;
17181 break;
17182 case V8QImode:
17183 type = v8qi_ftype_v8qi;
17184 break;
17185 case V4HImode:
17186 type = v4hi_ftype_v4hi;
17187 break;
17188 case V2SImode:
17189 type = v2si_ftype_v2si;
17190 break;
17191
17192 default:
17193 abort ();
17194 }
17195
17196 def_builtin (d->mask, d->name, type, d->code);
17197 }
17198
17199 /* Add the remaining MMX insns with somewhat more complicated types. */
17200 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
17201 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
17202 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
17203 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
17204
17205 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
17206 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
17207 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
17208
17209 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
17210 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
17211
17212 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
17213 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
17214
17215 /* comi/ucomi insns. */
17216 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
17217 if (d->mask == MASK_SSE2)
17218 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
17219 else
17220 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
17221
17222 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
17223 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
17224 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
17225
17226 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
17227 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
17228 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
17229 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
17230 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
17231 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
17232 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
17233 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
17234 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
17235 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
17236 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
17237
17238 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
17239
17240 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
17241 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
17242
17243 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
17244 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
17245 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
17246 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
17247
17248 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
17249 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
17250 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
17251 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
17252
17253 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
17254
17255 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
17256
17257 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
17258 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
17259 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
17260 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
17261 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
17262 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
17263
17264 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
17265
17266 /* Original 3DNow! */
17267 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
17268 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
17269 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
17270 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
17271 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
17272 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
17273 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
17274 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
17275 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
17276 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
17277 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
17278 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
17279 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
17280 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
17281 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
17282 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
17283 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
17284 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
17285 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
17286 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
17287
17288 /* 3DNow! extension as used in the Athlon CPU. */
17289 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17290 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17291 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17292 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17293 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17294 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17295
17296 /* SSE2 */
17297 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17298
17299 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17300 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17301
17302 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17303 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17304
17305 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17306 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17307 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17308 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17309 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17310
17311 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17312 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17313 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17314 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17315
17316 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17317 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17318
17319 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17320
17321 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17322 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17323
17324 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17325 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17326 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17327 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17328 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17329
17330 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17331
17332 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17333 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17334 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17335 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17336
17337 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17338 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17339 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17340
17341 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17342 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17343 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17344 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17345
17346 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17347 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17348 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17349
17350 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17351 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17352
17353 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17354 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17355
17356 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSLLW128);
17357 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSLLD128);
17358 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17359
17360 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRLW128);
17361 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRLD128);
17362 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17363
17364 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v2di, IX86_BUILTIN_PSRAW128);
17365 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v2di, IX86_BUILTIN_PSRAD128);
17366
17367 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17368 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17369 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17370 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17371
17372 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17373 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17374 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17375 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17376
17377 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17378 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17379
17380 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17381
17382 /* Prescott New Instructions. */
17383 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17384 void_ftype_pcvoid_unsigned_unsigned,
17385 IX86_BUILTIN_MONITOR);
17386 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17387 void_ftype_unsigned_unsigned,
17388 IX86_BUILTIN_MWAIT);
17389 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17390 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17391
17392 /* SSSE3. */
17393 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17394 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17395 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17396 IX86_BUILTIN_PALIGNR);
17397
17398 /* AMDFAM10 SSE4A New built-ins */
17399 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17400 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17401 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17402 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17403 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17404 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17405 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17406 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17407 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17408 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17409 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17410 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17411
17412 /* Access to the vec_init patterns. */
17413 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17414 integer_type_node, NULL_TREE);
17415 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17416 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17417
17418 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17419 short_integer_type_node,
17420 short_integer_type_node,
17421 short_integer_type_node, NULL_TREE);
17422 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17423 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17424
17425 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17426 char_type_node, char_type_node,
17427 char_type_node, char_type_node,
17428 char_type_node, char_type_node,
17429 char_type_node, NULL_TREE);
17430 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17431 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17432
17433 /* Access to the vec_extract patterns. */
17434 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17435 integer_type_node, NULL_TREE);
17436 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17437 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17438
17439 ftype = build_function_type_list (long_long_integer_type_node,
17440 V2DI_type_node, integer_type_node,
17441 NULL_TREE);
17442 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17443 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17444
17445 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17446 integer_type_node, NULL_TREE);
17447 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17448 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17449
17450 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17451 integer_type_node, NULL_TREE);
17452 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17453 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17454
17455 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17456 integer_type_node, NULL_TREE);
17457 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17458 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17459
17460 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17461 integer_type_node, NULL_TREE);
17462 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17463 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17464
17465 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17466 integer_type_node, NULL_TREE);
17467 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17468 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17469
17470 /* Access to the vec_set patterns. */
17471 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17472 intHI_type_node,
17473 integer_type_node, NULL_TREE);
17474 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17475 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17476
17477 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17478 intHI_type_node,
17479 integer_type_node, NULL_TREE);
17480 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17481 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17482 }
17483
17484 /* Errors in the source file can cause expand_expr to return const0_rtx
17485 where we expect a vector. To avoid crashing, use one of the vector
17486 clear instructions. */
17487 static rtx
17488 safe_vector_operand (rtx x, enum machine_mode mode)
17489 {
17490 if (x == const0_rtx)
17491 x = CONST0_RTX (mode);
17492 return x;
17493 }
17494
17495 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17496
17497 static rtx
17498 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17499 {
17500 rtx pat, xops[3];
17501 tree arg0 = CALL_EXPR_ARG (exp, 0);
17502 tree arg1 = CALL_EXPR_ARG (exp, 1);
17503 rtx op0 = expand_normal (arg0);
17504 rtx op1 = expand_normal (arg1);
17505 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17506 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17507 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17508
17509 if (VECTOR_MODE_P (mode0))
17510 op0 = safe_vector_operand (op0, mode0);
17511 if (VECTOR_MODE_P (mode1))
17512 op1 = safe_vector_operand (op1, mode1);
17513
17514 if (optimize || !target
17515 || GET_MODE (target) != tmode
17516 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17517 target = gen_reg_rtx (tmode);
17518
17519 if (GET_MODE (op1) == SImode && mode1 == TImode)
17520 {
17521 rtx x = gen_reg_rtx (V4SImode);
17522 emit_insn (gen_sse2_loadd (x, op1));
17523 op1 = gen_lowpart (TImode, x);
17524 }
17525
17526 /* The insn must want input operands in the same modes as the
17527 result. */
17528 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17529 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17530
17531 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17532 op0 = copy_to_mode_reg (mode0, op0);
17533 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17534 op1 = copy_to_mode_reg (mode1, op1);
17535
17536 /* ??? Using ix86_fixup_binary_operands is problematic when
17537 we've got mismatched modes. Fake it. */
17538
17539 xops[0] = target;
17540 xops[1] = op0;
17541 xops[2] = op1;
17542
17543 if (tmode == mode0 && tmode == mode1)
17544 {
17545 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17546 op0 = xops[1];
17547 op1 = xops[2];
17548 }
17549 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17550 {
17551 op0 = force_reg (mode0, op0);
17552 op1 = force_reg (mode1, op1);
17553 target = gen_reg_rtx (tmode);
17554 }
17555
17556 pat = GEN_FCN (icode) (target, op0, op1);
17557 if (! pat)
17558 return 0;
17559 emit_insn (pat);
17560 return target;
17561 }
17562
17563 /* Subroutine of ix86_expand_builtin to take care of stores. */
17564
17565 static rtx
17566 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17567 {
17568 rtx pat;
17569 tree arg0 = CALL_EXPR_ARG (exp, 0);
17570 tree arg1 = CALL_EXPR_ARG (exp, 1);
17571 rtx op0 = expand_normal (arg0);
17572 rtx op1 = expand_normal (arg1);
17573 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17574 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17575
17576 if (VECTOR_MODE_P (mode1))
17577 op1 = safe_vector_operand (op1, mode1);
17578
17579 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17580 op1 = copy_to_mode_reg (mode1, op1);
17581
17582 pat = GEN_FCN (icode) (op0, op1);
17583 if (pat)
17584 emit_insn (pat);
17585 return 0;
17586 }
17587
17588 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17589
17590 static rtx
17591 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17592 rtx target, int do_load)
17593 {
17594 rtx pat;
17595 tree arg0 = CALL_EXPR_ARG (exp, 0);
17596 rtx op0 = expand_normal (arg0);
17597 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17598 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17599
17600 if (optimize || !target
17601 || GET_MODE (target) != tmode
17602 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17603 target = gen_reg_rtx (tmode);
17604 if (do_load)
17605 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17606 else
17607 {
17608 if (VECTOR_MODE_P (mode0))
17609 op0 = safe_vector_operand (op0, mode0);
17610
17611 if ((optimize && !register_operand (op0, mode0))
17612 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17613 op0 = copy_to_mode_reg (mode0, op0);
17614 }
17615
17616 pat = GEN_FCN (icode) (target, op0);
17617 if (! pat)
17618 return 0;
17619 emit_insn (pat);
17620 return target;
17621 }
17622
17623 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17624 sqrtss, rsqrtss, rcpss. */
17625
17626 static rtx
17627 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17628 {
17629 rtx pat;
17630 tree arg0 = CALL_EXPR_ARG (exp, 0);
17631 rtx op1, op0 = expand_normal (arg0);
17632 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17633 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17634
17635 if (optimize || !target
17636 || GET_MODE (target) != tmode
17637 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17638 target = gen_reg_rtx (tmode);
17639
17640 if (VECTOR_MODE_P (mode0))
17641 op0 = safe_vector_operand (op0, mode0);
17642
17643 if ((optimize && !register_operand (op0, mode0))
17644 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17645 op0 = copy_to_mode_reg (mode0, op0);
17646
17647 op1 = op0;
17648 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17649 op1 = copy_to_mode_reg (mode0, op1);
17650
17651 pat = GEN_FCN (icode) (target, op0, op1);
17652 if (! pat)
17653 return 0;
17654 emit_insn (pat);
17655 return target;
17656 }
17657
17658 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17659
17660 static rtx
17661 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17662 rtx target)
17663 {
17664 rtx pat;
17665 tree arg0 = CALL_EXPR_ARG (exp, 0);
17666 tree arg1 = CALL_EXPR_ARG (exp, 1);
17667 rtx op0 = expand_normal (arg0);
17668 rtx op1 = expand_normal (arg1);
17669 rtx op2;
17670 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17671 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17672 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17673 enum rtx_code comparison = d->comparison;
17674
17675 if (VECTOR_MODE_P (mode0))
17676 op0 = safe_vector_operand (op0, mode0);
17677 if (VECTOR_MODE_P (mode1))
17678 op1 = safe_vector_operand (op1, mode1);
17679
17680 /* Swap operands if we have a comparison that isn't available in
17681 hardware. */
17682 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17683 {
17684 rtx tmp = gen_reg_rtx (mode1);
17685 emit_move_insn (tmp, op1);
17686 op1 = op0;
17687 op0 = tmp;
17688 }
17689
17690 if (optimize || !target
17691 || GET_MODE (target) != tmode
17692 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17693 target = gen_reg_rtx (tmode);
17694
17695 if ((optimize && !register_operand (op0, mode0))
17696 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17697 op0 = copy_to_mode_reg (mode0, op0);
17698 if ((optimize && !register_operand (op1, mode1))
17699 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17700 op1 = copy_to_mode_reg (mode1, op1);
17701
17702 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17703 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17704 if (! pat)
17705 return 0;
17706 emit_insn (pat);
17707 return target;
17708 }
17709
17710 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17711
17712 static rtx
17713 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17714 rtx target)
17715 {
17716 rtx pat;
17717 tree arg0 = CALL_EXPR_ARG (exp, 0);
17718 tree arg1 = CALL_EXPR_ARG (exp, 1);
17719 rtx op0 = expand_normal (arg0);
17720 rtx op1 = expand_normal (arg1);
17721 rtx op2;
17722 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17723 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17724 enum rtx_code comparison = d->comparison;
17725
17726 if (VECTOR_MODE_P (mode0))
17727 op0 = safe_vector_operand (op0, mode0);
17728 if (VECTOR_MODE_P (mode1))
17729 op1 = safe_vector_operand (op1, mode1);
17730
17731 /* Swap operands if we have a comparison that isn't available in
17732 hardware. */
17733 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17734 {
17735 rtx tmp = op1;
17736 op1 = op0;
17737 op0 = tmp;
17738 }
17739
17740 target = gen_reg_rtx (SImode);
17741 emit_move_insn (target, const0_rtx);
17742 target = gen_rtx_SUBREG (QImode, target, 0);
17743
17744 if ((optimize && !register_operand (op0, mode0))
17745 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17746 op0 = copy_to_mode_reg (mode0, op0);
17747 if ((optimize && !register_operand (op1, mode1))
17748 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17749 op1 = copy_to_mode_reg (mode1, op1);
17750
17751 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17752 pat = GEN_FCN (d->icode) (op0, op1);
17753 if (! pat)
17754 return 0;
17755 emit_insn (pat);
17756 emit_insn (gen_rtx_SET (VOIDmode,
17757 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17758 gen_rtx_fmt_ee (comparison, QImode,
17759 SET_DEST (pat),
17760 const0_rtx)));
17761
17762 return SUBREG_REG (target);
17763 }
17764
17765 /* Return the integer constant in ARG. Constrain it to be in the range
17766 of the subparts of VEC_TYPE; issue an error if not. */
17767
17768 static int
17769 get_element_number (tree vec_type, tree arg)
17770 {
17771 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17772
17773 if (!host_integerp (arg, 1)
17774 || (elt = tree_low_cst (arg, 1), elt > max))
17775 {
17776 error ("selector must be an integer constant in the range 0..%wi", max);
17777 return 0;
17778 }
17779
17780 return elt;
17781 }
17782
17783 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17784 ix86_expand_vector_init. We DO have language-level syntax for this, in
17785 the form of (type){ init-list }. Except that since we can't place emms
17786 instructions from inside the compiler, we can't allow the use of MMX
17787 registers unless the user explicitly asks for it. So we do *not* define
17788 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17789 we have builtins invoked by mmintrin.h that gives us license to emit
17790 these sorts of instructions. */
17791
17792 static rtx
17793 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17794 {
17795 enum machine_mode tmode = TYPE_MODE (type);
17796 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17797 int i, n_elt = GET_MODE_NUNITS (tmode);
17798 rtvec v = rtvec_alloc (n_elt);
17799
17800 gcc_assert (VECTOR_MODE_P (tmode));
17801 gcc_assert (call_expr_nargs (exp) == n_elt);
17802
17803 for (i = 0; i < n_elt; ++i)
17804 {
17805 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17806 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17807 }
17808
17809 if (!target || !register_operand (target, tmode))
17810 target = gen_reg_rtx (tmode);
17811
17812 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17813 return target;
17814 }
17815
17816 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17817 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17818 had a language-level syntax for referencing vector elements. */
17819
17820 static rtx
17821 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17822 {
17823 enum machine_mode tmode, mode0;
17824 tree arg0, arg1;
17825 int elt;
17826 rtx op0;
17827
17828 arg0 = CALL_EXPR_ARG (exp, 0);
17829 arg1 = CALL_EXPR_ARG (exp, 1);
17830
17831 op0 = expand_normal (arg0);
17832 elt = get_element_number (TREE_TYPE (arg0), arg1);
17833
17834 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17835 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17836 gcc_assert (VECTOR_MODE_P (mode0));
17837
17838 op0 = force_reg (mode0, op0);
17839
17840 if (optimize || !target || !register_operand (target, tmode))
17841 target = gen_reg_rtx (tmode);
17842
17843 ix86_expand_vector_extract (true, target, op0, elt);
17844
17845 return target;
17846 }
17847
17848 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17849 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17850 a language-level syntax for referencing vector elements. */
17851
17852 static rtx
17853 ix86_expand_vec_set_builtin (tree exp)
17854 {
17855 enum machine_mode tmode, mode1;
17856 tree arg0, arg1, arg2;
17857 int elt;
17858 rtx op0, op1;
17859
17860 arg0 = CALL_EXPR_ARG (exp, 0);
17861 arg1 = CALL_EXPR_ARG (exp, 1);
17862 arg2 = CALL_EXPR_ARG (exp, 2);
17863
17864 tmode = TYPE_MODE (TREE_TYPE (arg0));
17865 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17866 gcc_assert (VECTOR_MODE_P (tmode));
17867
17868 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17869 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17870 elt = get_element_number (TREE_TYPE (arg0), arg2);
17871
17872 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17873 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17874
17875 op0 = force_reg (tmode, op0);
17876 op1 = force_reg (mode1, op1);
17877
17878 ix86_expand_vector_set (true, op0, op1, elt);
17879
17880 return op0;
17881 }
17882
17883 /* Expand an expression EXP that calls a built-in function,
17884 with result going to TARGET if that's convenient
17885 (and in mode MODE if that's convenient).
17886 SUBTARGET may be used as the target for computing one of EXP's operands.
17887 IGNORE is nonzero if the value is to be ignored. */
17888
17889 static rtx
17890 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17891 enum machine_mode mode ATTRIBUTE_UNUSED,
17892 int ignore ATTRIBUTE_UNUSED)
17893 {
17894 const struct builtin_description *d;
17895 size_t i;
17896 enum insn_code icode;
17897 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17898 tree arg0, arg1, arg2, arg3;
17899 rtx op0, op1, op2, op3, pat;
17900 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17901 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17902
17903 switch (fcode)
17904 {
17905 case IX86_BUILTIN_EMMS:
17906 emit_insn (gen_mmx_emms ());
17907 return 0;
17908
17909 case IX86_BUILTIN_SFENCE:
17910 emit_insn (gen_sse_sfence ());
17911 return 0;
17912
17913 case IX86_BUILTIN_MASKMOVQ:
17914 case IX86_BUILTIN_MASKMOVDQU:
17915 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17916 ? CODE_FOR_mmx_maskmovq
17917 : CODE_FOR_sse2_maskmovdqu);
17918 /* Note the arg order is different from the operand order. */
17919 arg1 = CALL_EXPR_ARG (exp, 0);
17920 arg2 = CALL_EXPR_ARG (exp, 1);
17921 arg0 = CALL_EXPR_ARG (exp, 2);
17922 op0 = expand_normal (arg0);
17923 op1 = expand_normal (arg1);
17924 op2 = expand_normal (arg2);
17925 mode0 = insn_data[icode].operand[0].mode;
17926 mode1 = insn_data[icode].operand[1].mode;
17927 mode2 = insn_data[icode].operand[2].mode;
17928
17929 op0 = force_reg (Pmode, op0);
17930 op0 = gen_rtx_MEM (mode1, op0);
17931
17932 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17933 op0 = copy_to_mode_reg (mode0, op0);
17934 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17935 op1 = copy_to_mode_reg (mode1, op1);
17936 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17937 op2 = copy_to_mode_reg (mode2, op2);
17938 pat = GEN_FCN (icode) (op0, op1, op2);
17939 if (! pat)
17940 return 0;
17941 emit_insn (pat);
17942 return 0;
17943
17944 case IX86_BUILTIN_SQRTSS:
17945 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17946 case IX86_BUILTIN_RSQRTSS:
17947 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17948 case IX86_BUILTIN_RCPSS:
17949 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17950
17951 case IX86_BUILTIN_LOADUPS:
17952 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17953
17954 case IX86_BUILTIN_STOREUPS:
17955 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17956
17957 case IX86_BUILTIN_LOADHPS:
17958 case IX86_BUILTIN_LOADLPS:
17959 case IX86_BUILTIN_LOADHPD:
17960 case IX86_BUILTIN_LOADLPD:
17961 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17962 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17963 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17964 : CODE_FOR_sse2_loadlpd);
17965 arg0 = CALL_EXPR_ARG (exp, 0);
17966 arg1 = CALL_EXPR_ARG (exp, 1);
17967 op0 = expand_normal (arg0);
17968 op1 = expand_normal (arg1);
17969 tmode = insn_data[icode].operand[0].mode;
17970 mode0 = insn_data[icode].operand[1].mode;
17971 mode1 = insn_data[icode].operand[2].mode;
17972
17973 op0 = force_reg (mode0, op0);
17974 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17975 if (optimize || target == 0
17976 || GET_MODE (target) != tmode
17977 || !register_operand (target, tmode))
17978 target = gen_reg_rtx (tmode);
17979 pat = GEN_FCN (icode) (target, op0, op1);
17980 if (! pat)
17981 return 0;
17982 emit_insn (pat);
17983 return target;
17984
17985 case IX86_BUILTIN_STOREHPS:
17986 case IX86_BUILTIN_STORELPS:
17987 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17988 : CODE_FOR_sse_storelps);
17989 arg0 = CALL_EXPR_ARG (exp, 0);
17990 arg1 = CALL_EXPR_ARG (exp, 1);
17991 op0 = expand_normal (arg0);
17992 op1 = expand_normal (arg1);
17993 mode0 = insn_data[icode].operand[0].mode;
17994 mode1 = insn_data[icode].operand[1].mode;
17995
17996 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17997 op1 = force_reg (mode1, op1);
17998
17999 pat = GEN_FCN (icode) (op0, op1);
18000 if (! pat)
18001 return 0;
18002 emit_insn (pat);
18003 return const0_rtx;
18004
18005 case IX86_BUILTIN_MOVNTPS:
18006 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
18007 case IX86_BUILTIN_MOVNTQ:
18008 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
18009
18010 case IX86_BUILTIN_LDMXCSR:
18011 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
18012 target = assign_386_stack_local (SImode, SLOT_TEMP);
18013 emit_move_insn (target, op0);
18014 emit_insn (gen_sse_ldmxcsr (target));
18015 return 0;
18016
18017 case IX86_BUILTIN_STMXCSR:
18018 target = assign_386_stack_local (SImode, SLOT_TEMP);
18019 emit_insn (gen_sse_stmxcsr (target));
18020 return copy_to_mode_reg (SImode, target);
18021
18022 case IX86_BUILTIN_SHUFPS:
18023 case IX86_BUILTIN_SHUFPD:
18024 icode = (fcode == IX86_BUILTIN_SHUFPS
18025 ? CODE_FOR_sse_shufps
18026 : CODE_FOR_sse2_shufpd);
18027 arg0 = CALL_EXPR_ARG (exp, 0);
18028 arg1 = CALL_EXPR_ARG (exp, 1);
18029 arg2 = CALL_EXPR_ARG (exp, 2);
18030 op0 = expand_normal (arg0);
18031 op1 = expand_normal (arg1);
18032 op2 = expand_normal (arg2);
18033 tmode = insn_data[icode].operand[0].mode;
18034 mode0 = insn_data[icode].operand[1].mode;
18035 mode1 = insn_data[icode].operand[2].mode;
18036 mode2 = insn_data[icode].operand[3].mode;
18037
18038 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18039 op0 = copy_to_mode_reg (mode0, op0);
18040 if ((optimize && !register_operand (op1, mode1))
18041 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
18042 op1 = copy_to_mode_reg (mode1, op1);
18043 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
18044 {
18045 /* @@@ better error message */
18046 error ("mask must be an immediate");
18047 return gen_reg_rtx (tmode);
18048 }
18049 if (optimize || target == 0
18050 || GET_MODE (target) != tmode
18051 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18052 target = gen_reg_rtx (tmode);
18053 pat = GEN_FCN (icode) (target, op0, op1, op2);
18054 if (! pat)
18055 return 0;
18056 emit_insn (pat);
18057 return target;
18058
18059 case IX86_BUILTIN_PSHUFW:
18060 case IX86_BUILTIN_PSHUFD:
18061 case IX86_BUILTIN_PSHUFHW:
18062 case IX86_BUILTIN_PSHUFLW:
18063 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
18064 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
18065 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
18066 : CODE_FOR_mmx_pshufw);
18067 arg0 = CALL_EXPR_ARG (exp, 0);
18068 arg1 = CALL_EXPR_ARG (exp, 1);
18069 op0 = expand_normal (arg0);
18070 op1 = expand_normal (arg1);
18071 tmode = insn_data[icode].operand[0].mode;
18072 mode1 = insn_data[icode].operand[1].mode;
18073 mode2 = insn_data[icode].operand[2].mode;
18074
18075 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18076 op0 = copy_to_mode_reg (mode1, op0);
18077 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18078 {
18079 /* @@@ better error message */
18080 error ("mask must be an immediate");
18081 return const0_rtx;
18082 }
18083 if (target == 0
18084 || GET_MODE (target) != tmode
18085 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18086 target = gen_reg_rtx (tmode);
18087 pat = GEN_FCN (icode) (target, op0, op1);
18088 if (! pat)
18089 return 0;
18090 emit_insn (pat);
18091 return target;
18092
18093 case IX86_BUILTIN_PSLLDQI128:
18094 case IX86_BUILTIN_PSRLDQI128:
18095 icode = ( fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
18096 : CODE_FOR_sse2_lshrti3);
18097 arg0 = CALL_EXPR_ARG (exp, 0);
18098 arg1 = CALL_EXPR_ARG (exp, 1);
18099 op0 = expand_normal (arg0);
18100 op1 = expand_normal (arg1);
18101 tmode = insn_data[icode].operand[0].mode;
18102 mode1 = insn_data[icode].operand[1].mode;
18103 mode2 = insn_data[icode].operand[2].mode;
18104
18105 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18106 {
18107 op0 = copy_to_reg (op0);
18108 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18109 }
18110 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18111 {
18112 error ("shift must be an immediate");
18113 return const0_rtx;
18114 }
18115 target = gen_reg_rtx (V2DImode);
18116 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0), op0, op1);
18117 if (! pat)
18118 return 0;
18119 emit_insn (pat);
18120 return target;
18121
18122 case IX86_BUILTIN_FEMMS:
18123 emit_insn (gen_mmx_femms ());
18124 return NULL_RTX;
18125
18126 case IX86_BUILTIN_PAVGUSB:
18127 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
18128
18129 case IX86_BUILTIN_PF2ID:
18130 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
18131
18132 case IX86_BUILTIN_PFACC:
18133 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
18134
18135 case IX86_BUILTIN_PFADD:
18136 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
18137
18138 case IX86_BUILTIN_PFCMPEQ:
18139 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
18140
18141 case IX86_BUILTIN_PFCMPGE:
18142 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
18143
18144 case IX86_BUILTIN_PFCMPGT:
18145 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
18146
18147 case IX86_BUILTIN_PFMAX:
18148 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
18149
18150 case IX86_BUILTIN_PFMIN:
18151 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
18152
18153 case IX86_BUILTIN_PFMUL:
18154 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
18155
18156 case IX86_BUILTIN_PFRCP:
18157 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
18158
18159 case IX86_BUILTIN_PFRCPIT1:
18160 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
18161
18162 case IX86_BUILTIN_PFRCPIT2:
18163 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
18164
18165 case IX86_BUILTIN_PFRSQIT1:
18166 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
18167
18168 case IX86_BUILTIN_PFRSQRT:
18169 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
18170
18171 case IX86_BUILTIN_PFSUB:
18172 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
18173
18174 case IX86_BUILTIN_PFSUBR:
18175 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
18176
18177 case IX86_BUILTIN_PI2FD:
18178 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
18179
18180 case IX86_BUILTIN_PMULHRW:
18181 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
18182
18183 case IX86_BUILTIN_PF2IW:
18184 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
18185
18186 case IX86_BUILTIN_PFNACC:
18187 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18188
18189 case IX86_BUILTIN_PFPNACC:
18190 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18191
18192 case IX86_BUILTIN_PI2FW:
18193 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18194
18195 case IX86_BUILTIN_PSWAPDSI:
18196 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18197
18198 case IX86_BUILTIN_PSWAPDSF:
18199 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18200
18201 case IX86_BUILTIN_SQRTSD:
18202 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18203 case IX86_BUILTIN_LOADUPD:
18204 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18205 case IX86_BUILTIN_STOREUPD:
18206 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18207
18208 case IX86_BUILTIN_MFENCE:
18209 emit_insn (gen_sse2_mfence ());
18210 return 0;
18211 case IX86_BUILTIN_LFENCE:
18212 emit_insn (gen_sse2_lfence ());
18213 return 0;
18214
18215 case IX86_BUILTIN_CLFLUSH:
18216 arg0 = CALL_EXPR_ARG (exp, 0);
18217 op0 = expand_normal (arg0);
18218 icode = CODE_FOR_sse2_clflush;
18219 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18220 op0 = copy_to_mode_reg (Pmode, op0);
18221
18222 emit_insn (gen_sse2_clflush (op0));
18223 return 0;
18224
18225 case IX86_BUILTIN_MOVNTPD:
18226 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18227 case IX86_BUILTIN_MOVNTDQ:
18228 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18229 case IX86_BUILTIN_MOVNTI:
18230 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18231
18232 case IX86_BUILTIN_LOADDQU:
18233 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18234 case IX86_BUILTIN_STOREDQU:
18235 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18236
18237 case IX86_BUILTIN_MONITOR:
18238 arg0 = CALL_EXPR_ARG (exp, 0);
18239 arg1 = CALL_EXPR_ARG (exp, 1);
18240 arg2 = CALL_EXPR_ARG (exp, 2);
18241 op0 = expand_normal (arg0);
18242 op1 = expand_normal (arg1);
18243 op2 = expand_normal (arg2);
18244 if (!REG_P (op0))
18245 op0 = copy_to_mode_reg (Pmode, op0);
18246 if (!REG_P (op1))
18247 op1 = copy_to_mode_reg (SImode, op1);
18248 if (!REG_P (op2))
18249 op2 = copy_to_mode_reg (SImode, op2);
18250 if (!TARGET_64BIT)
18251 emit_insn (gen_sse3_monitor (op0, op1, op2));
18252 else
18253 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18254 return 0;
18255
18256 case IX86_BUILTIN_MWAIT:
18257 arg0 = CALL_EXPR_ARG (exp, 0);
18258 arg1 = CALL_EXPR_ARG (exp, 1);
18259 op0 = expand_normal (arg0);
18260 op1 = expand_normal (arg1);
18261 if (!REG_P (op0))
18262 op0 = copy_to_mode_reg (SImode, op0);
18263 if (!REG_P (op1))
18264 op1 = copy_to_mode_reg (SImode, op1);
18265 emit_insn (gen_sse3_mwait (op0, op1));
18266 return 0;
18267
18268 case IX86_BUILTIN_LDDQU:
18269 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18270 target, 1);
18271
18272 case IX86_BUILTIN_PALIGNR:
18273 case IX86_BUILTIN_PALIGNR128:
18274 if (fcode == IX86_BUILTIN_PALIGNR)
18275 {
18276 icode = CODE_FOR_ssse3_palignrdi;
18277 mode = DImode;
18278 }
18279 else
18280 {
18281 icode = CODE_FOR_ssse3_palignrti;
18282 mode = V2DImode;
18283 }
18284 arg0 = CALL_EXPR_ARG (exp, 0);
18285 arg1 = CALL_EXPR_ARG (exp, 1);
18286 arg2 = CALL_EXPR_ARG (exp, 2);
18287 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18288 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18289 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18290 tmode = insn_data[icode].operand[0].mode;
18291 mode1 = insn_data[icode].operand[1].mode;
18292 mode2 = insn_data[icode].operand[2].mode;
18293 mode3 = insn_data[icode].operand[3].mode;
18294
18295 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18296 {
18297 op0 = copy_to_reg (op0);
18298 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18299 }
18300 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18301 {
18302 op1 = copy_to_reg (op1);
18303 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18304 }
18305 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18306 {
18307 error ("shift must be an immediate");
18308 return const0_rtx;
18309 }
18310 target = gen_reg_rtx (mode);
18311 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18312 op0, op1, op2);
18313 if (! pat)
18314 return 0;
18315 emit_insn (pat);
18316 return target;
18317
18318 case IX86_BUILTIN_MOVNTSD:
18319 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18320
18321 case IX86_BUILTIN_MOVNTSS:
18322 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18323
18324 case IX86_BUILTIN_INSERTQ:
18325 case IX86_BUILTIN_EXTRQ:
18326 icode = (fcode == IX86_BUILTIN_EXTRQ
18327 ? CODE_FOR_sse4a_extrq
18328 : CODE_FOR_sse4a_insertq);
18329 arg0 = CALL_EXPR_ARG (exp, 0);
18330 arg1 = CALL_EXPR_ARG (exp, 1);
18331 op0 = expand_normal (arg0);
18332 op1 = expand_normal (arg1);
18333 tmode = insn_data[icode].operand[0].mode;
18334 mode1 = insn_data[icode].operand[1].mode;
18335 mode2 = insn_data[icode].operand[2].mode;
18336 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18337 op0 = copy_to_mode_reg (mode1, op0);
18338 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18339 op1 = copy_to_mode_reg (mode2, op1);
18340 if (optimize || target == 0
18341 || GET_MODE (target) != tmode
18342 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18343 target = gen_reg_rtx (tmode);
18344 pat = GEN_FCN (icode) (target, op0, op1);
18345 if (! pat)
18346 return NULL_RTX;
18347 emit_insn (pat);
18348 return target;
18349
18350 case IX86_BUILTIN_EXTRQI:
18351 icode = CODE_FOR_sse4a_extrqi;
18352 arg0 = CALL_EXPR_ARG (exp, 0);
18353 arg1 = CALL_EXPR_ARG (exp, 1);
18354 arg2 = CALL_EXPR_ARG (exp, 2);
18355 op0 = expand_normal (arg0);
18356 op1 = expand_normal (arg1);
18357 op2 = expand_normal (arg2);
18358 tmode = insn_data[icode].operand[0].mode;
18359 mode1 = insn_data[icode].operand[1].mode;
18360 mode2 = insn_data[icode].operand[2].mode;
18361 mode3 = insn_data[icode].operand[3].mode;
18362 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18363 op0 = copy_to_mode_reg (mode1, op0);
18364 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18365 {
18366 error ("index mask must be an immediate");
18367 return gen_reg_rtx (tmode);
18368 }
18369 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18370 {
18371 error ("length mask must be an immediate");
18372 return gen_reg_rtx (tmode);
18373 }
18374 if (optimize || target == 0
18375 || GET_MODE (target) != tmode
18376 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18377 target = gen_reg_rtx (tmode);
18378 pat = GEN_FCN (icode) (target, op0, op1, op2);
18379 if (! pat)
18380 return NULL_RTX;
18381 emit_insn (pat);
18382 return target;
18383
18384 case IX86_BUILTIN_INSERTQI:
18385 icode = CODE_FOR_sse4a_insertqi;
18386 arg0 = CALL_EXPR_ARG (exp, 0);
18387 arg1 = CALL_EXPR_ARG (exp, 1);
18388 arg2 = CALL_EXPR_ARG (exp, 2);
18389 arg3 = CALL_EXPR_ARG (exp, 3);
18390 op0 = expand_normal (arg0);
18391 op1 = expand_normal (arg1);
18392 op2 = expand_normal (arg2);
18393 op3 = expand_normal (arg3);
18394 tmode = insn_data[icode].operand[0].mode;
18395 mode1 = insn_data[icode].operand[1].mode;
18396 mode2 = insn_data[icode].operand[2].mode;
18397 mode3 = insn_data[icode].operand[3].mode;
18398 mode4 = insn_data[icode].operand[4].mode;
18399
18400 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18401 op0 = copy_to_mode_reg (mode1, op0);
18402
18403 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18404 op1 = copy_to_mode_reg (mode2, op1);
18405
18406 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18407 {
18408 error ("index mask must be an immediate");
18409 return gen_reg_rtx (tmode);
18410 }
18411 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18412 {
18413 error ("length mask must be an immediate");
18414 return gen_reg_rtx (tmode);
18415 }
18416 if (optimize || target == 0
18417 || GET_MODE (target) != tmode
18418 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18419 target = gen_reg_rtx (tmode);
18420 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18421 if (! pat)
18422 return NULL_RTX;
18423 emit_insn (pat);
18424 return target;
18425
18426 case IX86_BUILTIN_VEC_INIT_V2SI:
18427 case IX86_BUILTIN_VEC_INIT_V4HI:
18428 case IX86_BUILTIN_VEC_INIT_V8QI:
18429 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18430
18431 case IX86_BUILTIN_VEC_EXT_V2DF:
18432 case IX86_BUILTIN_VEC_EXT_V2DI:
18433 case IX86_BUILTIN_VEC_EXT_V4SF:
18434 case IX86_BUILTIN_VEC_EXT_V4SI:
18435 case IX86_BUILTIN_VEC_EXT_V8HI:
18436 case IX86_BUILTIN_VEC_EXT_V2SI:
18437 case IX86_BUILTIN_VEC_EXT_V4HI:
18438 return ix86_expand_vec_ext_builtin (exp, target);
18439
18440 case IX86_BUILTIN_VEC_SET_V8HI:
18441 case IX86_BUILTIN_VEC_SET_V4HI:
18442 return ix86_expand_vec_set_builtin (exp);
18443
18444 default:
18445 break;
18446 }
18447
18448 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18449 if (d->code == fcode)
18450 {
18451 /* Compares are treated specially. */
18452 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18453 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18454 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18455 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18456 return ix86_expand_sse_compare (d, exp, target);
18457
18458 return ix86_expand_binop_builtin (d->icode, exp, target);
18459 }
18460
18461 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18462 if (d->code == fcode)
18463 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18464
18465 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18466 if (d->code == fcode)
18467 return ix86_expand_sse_comi (d, exp, target);
18468
18469 gcc_unreachable ();
18470 }
18471
18472 /* Returns a function decl for a vectorized version of the builtin function
18473 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18474 if it is not available. */
18475
18476 static tree
18477 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18478 tree type_in)
18479 {
18480 enum machine_mode in_mode, out_mode;
18481 int in_n, out_n;
18482
18483 if (TREE_CODE (type_out) != VECTOR_TYPE
18484 || TREE_CODE (type_in) != VECTOR_TYPE)
18485 return NULL_TREE;
18486
18487 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18488 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18489 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18490 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18491
18492 switch (fn)
18493 {
18494 case BUILT_IN_SQRT:
18495 if (out_mode == DFmode && out_n == 2
18496 && in_mode == DFmode && in_n == 2)
18497 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18498 return NULL_TREE;
18499
18500 case BUILT_IN_SQRTF:
18501 if (out_mode == SFmode && out_n == 4
18502 && in_mode == SFmode && in_n == 4)
18503 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18504 return NULL_TREE;
18505
18506 case BUILT_IN_LRINTF:
18507 if (out_mode == SImode && out_n == 4
18508 && in_mode == SFmode && in_n == 4)
18509 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18510 return NULL_TREE;
18511
18512 default:
18513 ;
18514 }
18515
18516 return NULL_TREE;
18517 }
18518
18519 /* Returns a decl of a function that implements conversion of the
18520 input vector of type TYPE, or NULL_TREE if it is not available. */
18521
18522 static tree
18523 ix86_builtin_conversion (enum tree_code code, tree type)
18524 {
18525 if (TREE_CODE (type) != VECTOR_TYPE)
18526 return NULL_TREE;
18527
18528 switch (code)
18529 {
18530 case FLOAT_EXPR:
18531 switch (TYPE_MODE (type))
18532 {
18533 case V4SImode:
18534 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18535 default:
18536 return NULL_TREE;
18537 }
18538
18539 case FIX_TRUNC_EXPR:
18540 switch (TYPE_MODE (type))
18541 {
18542 case V4SFmode:
18543 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18544 default:
18545 return NULL_TREE;
18546 }
18547 default:
18548 return NULL_TREE;
18549
18550 }
18551 }
18552
18553 /* Store OPERAND to the memory after reload is completed. This means
18554 that we can't easily use assign_stack_local. */
18555 rtx
18556 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18557 {
18558 rtx result;
18559
18560 gcc_assert (reload_completed);
18561 if (TARGET_RED_ZONE)
18562 {
18563 result = gen_rtx_MEM (mode,
18564 gen_rtx_PLUS (Pmode,
18565 stack_pointer_rtx,
18566 GEN_INT (-RED_ZONE_SIZE)));
18567 emit_move_insn (result, operand);
18568 }
18569 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18570 {
18571 switch (mode)
18572 {
18573 case HImode:
18574 case SImode:
18575 operand = gen_lowpart (DImode, operand);
18576 /* FALLTHRU */
18577 case DImode:
18578 emit_insn (
18579 gen_rtx_SET (VOIDmode,
18580 gen_rtx_MEM (DImode,
18581 gen_rtx_PRE_DEC (DImode,
18582 stack_pointer_rtx)),
18583 operand));
18584 break;
18585 default:
18586 gcc_unreachable ();
18587 }
18588 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18589 }
18590 else
18591 {
18592 switch (mode)
18593 {
18594 case DImode:
18595 {
18596 rtx operands[2];
18597 split_di (&operand, 1, operands, operands + 1);
18598 emit_insn (
18599 gen_rtx_SET (VOIDmode,
18600 gen_rtx_MEM (SImode,
18601 gen_rtx_PRE_DEC (Pmode,
18602 stack_pointer_rtx)),
18603 operands[1]));
18604 emit_insn (
18605 gen_rtx_SET (VOIDmode,
18606 gen_rtx_MEM (SImode,
18607 gen_rtx_PRE_DEC (Pmode,
18608 stack_pointer_rtx)),
18609 operands[0]));
18610 }
18611 break;
18612 case HImode:
18613 /* Store HImodes as SImodes. */
18614 operand = gen_lowpart (SImode, operand);
18615 /* FALLTHRU */
18616 case SImode:
18617 emit_insn (
18618 gen_rtx_SET (VOIDmode,
18619 gen_rtx_MEM (GET_MODE (operand),
18620 gen_rtx_PRE_DEC (SImode,
18621 stack_pointer_rtx)),
18622 operand));
18623 break;
18624 default:
18625 gcc_unreachable ();
18626 }
18627 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18628 }
18629 return result;
18630 }
18631
18632 /* Free operand from the memory. */
18633 void
18634 ix86_free_from_memory (enum machine_mode mode)
18635 {
18636 if (!TARGET_RED_ZONE)
18637 {
18638 int size;
18639
18640 if (mode == DImode || TARGET_64BIT)
18641 size = 8;
18642 else
18643 size = 4;
18644 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18645 to pop or add instruction if registers are available. */
18646 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18647 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18648 GEN_INT (size))));
18649 }
18650 }
18651
18652 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18653 QImode must go into class Q_REGS.
18654 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18655 movdf to do mem-to-mem moves through integer regs. */
18656 enum reg_class
18657 ix86_preferred_reload_class (rtx x, enum reg_class class)
18658 {
18659 enum machine_mode mode = GET_MODE (x);
18660
18661 /* We're only allowed to return a subclass of CLASS. Many of the
18662 following checks fail for NO_REGS, so eliminate that early. */
18663 if (class == NO_REGS)
18664 return NO_REGS;
18665
18666 /* All classes can load zeros. */
18667 if (x == CONST0_RTX (mode))
18668 return class;
18669
18670 /* Force constants into memory if we are loading a (nonzero) constant into
18671 an MMX or SSE register. This is because there are no MMX/SSE instructions
18672 to load from a constant. */
18673 if (CONSTANT_P (x)
18674 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18675 return NO_REGS;
18676
18677 /* Prefer SSE regs only, if we can use them for math. */
18678 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18679 return SSE_CLASS_P (class) ? class : NO_REGS;
18680
18681 /* Floating-point constants need more complex checks. */
18682 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18683 {
18684 /* General regs can load everything. */
18685 if (reg_class_subset_p (class, GENERAL_REGS))
18686 return class;
18687
18688 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18689 zero above. We only want to wind up preferring 80387 registers if
18690 we plan on doing computation with them. */
18691 if (TARGET_80387
18692 && standard_80387_constant_p (x))
18693 {
18694 /* Limit class to non-sse. */
18695 if (class == FLOAT_SSE_REGS)
18696 return FLOAT_REGS;
18697 if (class == FP_TOP_SSE_REGS)
18698 return FP_TOP_REG;
18699 if (class == FP_SECOND_SSE_REGS)
18700 return FP_SECOND_REG;
18701 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18702 return class;
18703 }
18704
18705 return NO_REGS;
18706 }
18707
18708 /* Generally when we see PLUS here, it's the function invariant
18709 (plus soft-fp const_int). Which can only be computed into general
18710 regs. */
18711 if (GET_CODE (x) == PLUS)
18712 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18713
18714 /* QImode constants are easy to load, but non-constant QImode data
18715 must go into Q_REGS. */
18716 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18717 {
18718 if (reg_class_subset_p (class, Q_REGS))
18719 return class;
18720 if (reg_class_subset_p (Q_REGS, class))
18721 return Q_REGS;
18722 return NO_REGS;
18723 }
18724
18725 return class;
18726 }
18727
18728 /* Discourage putting floating-point values in SSE registers unless
18729 SSE math is being used, and likewise for the 387 registers. */
18730 enum reg_class
18731 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18732 {
18733 enum machine_mode mode = GET_MODE (x);
18734
18735 /* Restrict the output reload class to the register bank that we are doing
18736 math on. If we would like not to return a subset of CLASS, reject this
18737 alternative: if reload cannot do this, it will still use its choice. */
18738 mode = GET_MODE (x);
18739 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18740 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18741
18742 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18743 {
18744 if (class == FP_TOP_SSE_REGS)
18745 return FP_TOP_REG;
18746 else if (class == FP_SECOND_SSE_REGS)
18747 return FP_SECOND_REG;
18748 else
18749 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18750 }
18751
18752 return class;
18753 }
18754
18755 /* If we are copying between general and FP registers, we need a memory
18756 location. The same is true for SSE and MMX registers.
18757
18758 The macro can't work reliably when one of the CLASSES is class containing
18759 registers from multiple units (SSE, MMX, integer). We avoid this by never
18760 combining those units in single alternative in the machine description.
18761 Ensure that this constraint holds to avoid unexpected surprises.
18762
18763 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18764 enforce these sanity checks. */
18765
18766 int
18767 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18768 enum machine_mode mode, int strict)
18769 {
18770 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18771 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18772 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18773 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18774 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18775 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18776 {
18777 gcc_assert (!strict);
18778 return true;
18779 }
18780
18781 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18782 return true;
18783
18784 /* ??? This is a lie. We do have moves between mmx/general, and for
18785 mmx/sse2. But by saying we need secondary memory we discourage the
18786 register allocator from using the mmx registers unless needed. */
18787 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18788 return true;
18789
18790 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18791 {
18792 /* SSE1 doesn't have any direct moves from other classes. */
18793 if (!TARGET_SSE2)
18794 return true;
18795
18796 /* If the target says that inter-unit moves are more expensive
18797 than moving through memory, then don't generate them. */
18798 if (!TARGET_INTER_UNIT_MOVES)
18799 return true;
18800
18801 /* Between SSE and general, we have moves no larger than word size. */
18802 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18803 return true;
18804 }
18805
18806 return false;
18807 }
18808
18809 /* Return true if the registers in CLASS cannot represent the change from
18810 modes FROM to TO. */
18811
18812 bool
18813 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18814 enum reg_class class)
18815 {
18816 if (from == to)
18817 return false;
18818
18819 /* x87 registers can't do subreg at all, as all values are reformatted
18820 to extended precision. */
18821 if (MAYBE_FLOAT_CLASS_P (class))
18822 return true;
18823
18824 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18825 {
18826 /* Vector registers do not support QI or HImode loads. If we don't
18827 disallow a change to these modes, reload will assume it's ok to
18828 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18829 the vec_dupv4hi pattern. */
18830 if (GET_MODE_SIZE (from) < 4)
18831 return true;
18832
18833 /* Vector registers do not support subreg with nonzero offsets, which
18834 are otherwise valid for integer registers. Since we can't see
18835 whether we have a nonzero offset from here, prohibit all
18836 nonparadoxical subregs changing size. */
18837 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18838 return true;
18839 }
18840
18841 return false;
18842 }
18843
18844 /* Return the cost of moving data from a register in class CLASS1 to
18845 one in class CLASS2.
18846
18847 It is not required that the cost always equal 2 when FROM is the same as TO;
18848 on some machines it is expensive to move between registers if they are not
18849 general registers. */
18850
18851 int
18852 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18853 enum reg_class class2)
18854 {
18855 /* In case we require secondary memory, compute cost of the store followed
18856 by load. In order to avoid bad register allocation choices, we need
18857 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18858
18859 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18860 {
18861 int cost = 1;
18862
18863 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18864 MEMORY_MOVE_COST (mode, class1, 1));
18865 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18866 MEMORY_MOVE_COST (mode, class2, 1));
18867
18868 /* In case of copying from general_purpose_register we may emit multiple
18869 stores followed by single load causing memory size mismatch stall.
18870 Count this as arbitrarily high cost of 20. */
18871 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18872 cost += 20;
18873
18874 /* In the case of FP/MMX moves, the registers actually overlap, and we
18875 have to switch modes in order to treat them differently. */
18876 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18877 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18878 cost += 20;
18879
18880 return cost;
18881 }
18882
18883 /* Moves between SSE/MMX and integer unit are expensive. */
18884 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18885 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18886 return ix86_cost->mmxsse_to_integer;
18887 if (MAYBE_FLOAT_CLASS_P (class1))
18888 return ix86_cost->fp_move;
18889 if (MAYBE_SSE_CLASS_P (class1))
18890 return ix86_cost->sse_move;
18891 if (MAYBE_MMX_CLASS_P (class1))
18892 return ix86_cost->mmx_move;
18893 return 2;
18894 }
18895
18896 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18897
18898 bool
18899 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18900 {
18901 /* Flags and only flags can only hold CCmode values. */
18902 if (CC_REGNO_P (regno))
18903 return GET_MODE_CLASS (mode) == MODE_CC;
18904 if (GET_MODE_CLASS (mode) == MODE_CC
18905 || GET_MODE_CLASS (mode) == MODE_RANDOM
18906 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18907 return 0;
18908 if (FP_REGNO_P (regno))
18909 return VALID_FP_MODE_P (mode);
18910 if (SSE_REGNO_P (regno))
18911 {
18912 /* We implement the move patterns for all vector modes into and
18913 out of SSE registers, even when no operation instructions
18914 are available. */
18915 return (VALID_SSE_REG_MODE (mode)
18916 || VALID_SSE2_REG_MODE (mode)
18917 || VALID_MMX_REG_MODE (mode)
18918 || VALID_MMX_REG_MODE_3DNOW (mode));
18919 }
18920 if (MMX_REGNO_P (regno))
18921 {
18922 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18923 so if the register is available at all, then we can move data of
18924 the given mode into or out of it. */
18925 return (VALID_MMX_REG_MODE (mode)
18926 || VALID_MMX_REG_MODE_3DNOW (mode));
18927 }
18928
18929 if (mode == QImode)
18930 {
18931 /* Take care for QImode values - they can be in non-QI regs,
18932 but then they do cause partial register stalls. */
18933 if (regno < 4 || TARGET_64BIT)
18934 return 1;
18935 if (!TARGET_PARTIAL_REG_STALL)
18936 return 1;
18937 return reload_in_progress || reload_completed;
18938 }
18939 /* We handle both integer and floats in the general purpose registers. */
18940 else if (VALID_INT_MODE_P (mode))
18941 return 1;
18942 else if (VALID_FP_MODE_P (mode))
18943 return 1;
18944 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18945 on to use that value in smaller contexts, this can easily force a
18946 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18947 supporting DImode, allow it. */
18948 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18949 return 1;
18950
18951 return 0;
18952 }
18953
18954 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18955 tieable integer mode. */
18956
18957 static bool
18958 ix86_tieable_integer_mode_p (enum machine_mode mode)
18959 {
18960 switch (mode)
18961 {
18962 case HImode:
18963 case SImode:
18964 return true;
18965
18966 case QImode:
18967 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18968
18969 case DImode:
18970 return TARGET_64BIT;
18971
18972 default:
18973 return false;
18974 }
18975 }
18976
18977 /* Return true if MODE1 is accessible in a register that can hold MODE2
18978 without copying. That is, all register classes that can hold MODE2
18979 can also hold MODE1. */
18980
18981 bool
18982 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18983 {
18984 if (mode1 == mode2)
18985 return true;
18986
18987 if (ix86_tieable_integer_mode_p (mode1)
18988 && ix86_tieable_integer_mode_p (mode2))
18989 return true;
18990
18991 /* MODE2 being XFmode implies fp stack or general regs, which means we
18992 can tie any smaller floating point modes to it. Note that we do not
18993 tie this with TFmode. */
18994 if (mode2 == XFmode)
18995 return mode1 == SFmode || mode1 == DFmode;
18996
18997 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18998 that we can tie it with SFmode. */
18999 if (mode2 == DFmode)
19000 return mode1 == SFmode;
19001
19002 /* If MODE2 is only appropriate for an SSE register, then tie with
19003 any other mode acceptable to SSE registers. */
19004 if (GET_MODE_SIZE (mode2) == 16
19005 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
19006 return (GET_MODE_SIZE (mode1) == 16
19007 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
19008
19009 /* If MODE2 is appropriate for an MMX register, then tie
19010 with any other mode acceptable to MMX registers. */
19011 if (GET_MODE_SIZE (mode2) == 8
19012 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
19013 return (GET_MODE_SIZE (mode1) == 8
19014 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
19015
19016 return false;
19017 }
19018
19019 /* Return the cost of moving data of mode M between a
19020 register and memory. A value of 2 is the default; this cost is
19021 relative to those in `REGISTER_MOVE_COST'.
19022
19023 If moving between registers and memory is more expensive than
19024 between two registers, you should define this macro to express the
19025 relative cost.
19026
19027 Model also increased moving costs of QImode registers in non
19028 Q_REGS classes.
19029 */
19030 int
19031 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
19032 {
19033 if (FLOAT_CLASS_P (class))
19034 {
19035 int index;
19036 switch (mode)
19037 {
19038 case SFmode:
19039 index = 0;
19040 break;
19041 case DFmode:
19042 index = 1;
19043 break;
19044 case XFmode:
19045 index = 2;
19046 break;
19047 default:
19048 return 100;
19049 }
19050 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
19051 }
19052 if (SSE_CLASS_P (class))
19053 {
19054 int index;
19055 switch (GET_MODE_SIZE (mode))
19056 {
19057 case 4:
19058 index = 0;
19059 break;
19060 case 8:
19061 index = 1;
19062 break;
19063 case 16:
19064 index = 2;
19065 break;
19066 default:
19067 return 100;
19068 }
19069 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
19070 }
19071 if (MMX_CLASS_P (class))
19072 {
19073 int index;
19074 switch (GET_MODE_SIZE (mode))
19075 {
19076 case 4:
19077 index = 0;
19078 break;
19079 case 8:
19080 index = 1;
19081 break;
19082 default:
19083 return 100;
19084 }
19085 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
19086 }
19087 switch (GET_MODE_SIZE (mode))
19088 {
19089 case 1:
19090 if (in)
19091 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
19092 : ix86_cost->movzbl_load);
19093 else
19094 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
19095 : ix86_cost->int_store[0] + 4);
19096 break;
19097 case 2:
19098 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
19099 default:
19100 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
19101 if (mode == TFmode)
19102 mode = XFmode;
19103 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
19104 * (((int) GET_MODE_SIZE (mode)
19105 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
19106 }
19107 }
19108
19109 /* Compute a (partial) cost for rtx X. Return true if the complete
19110 cost has been computed, and false if subexpressions should be
19111 scanned. In either case, *TOTAL contains the cost result. */
19112
19113 static bool
19114 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
19115 {
19116 enum machine_mode mode = GET_MODE (x);
19117
19118 switch (code)
19119 {
19120 case CONST_INT:
19121 case CONST:
19122 case LABEL_REF:
19123 case SYMBOL_REF:
19124 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
19125 *total = 3;
19126 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
19127 *total = 2;
19128 else if (flag_pic && SYMBOLIC_CONST (x)
19129 && (!TARGET_64BIT
19130 || (!GET_CODE (x) != LABEL_REF
19131 && (GET_CODE (x) != SYMBOL_REF
19132 || !SYMBOL_REF_LOCAL_P (x)))))
19133 *total = 1;
19134 else
19135 *total = 0;
19136 return true;
19137
19138 case CONST_DOUBLE:
19139 if (mode == VOIDmode)
19140 *total = 0;
19141 else
19142 switch (standard_80387_constant_p (x))
19143 {
19144 case 1: /* 0.0 */
19145 *total = 1;
19146 break;
19147 default: /* Other constants */
19148 *total = 2;
19149 break;
19150 case 0:
19151 case -1:
19152 /* Start with (MEM (SYMBOL_REF)), since that's where
19153 it'll probably end up. Add a penalty for size. */
19154 *total = (COSTS_N_INSNS (1)
19155 + (flag_pic != 0 && !TARGET_64BIT)
19156 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
19157 break;
19158 }
19159 return true;
19160
19161 case ZERO_EXTEND:
19162 /* The zero extensions is often completely free on x86_64, so make
19163 it as cheap as possible. */
19164 if (TARGET_64BIT && mode == DImode
19165 && GET_MODE (XEXP (x, 0)) == SImode)
19166 *total = 1;
19167 else if (TARGET_ZERO_EXTEND_WITH_AND)
19168 *total = ix86_cost->add;
19169 else
19170 *total = ix86_cost->movzx;
19171 return false;
19172
19173 case SIGN_EXTEND:
19174 *total = ix86_cost->movsx;
19175 return false;
19176
19177 case ASHIFT:
19178 if (CONST_INT_P (XEXP (x, 1))
19179 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
19180 {
19181 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19182 if (value == 1)
19183 {
19184 *total = ix86_cost->add;
19185 return false;
19186 }
19187 if ((value == 2 || value == 3)
19188 && ix86_cost->lea <= ix86_cost->shift_const)
19189 {
19190 *total = ix86_cost->lea;
19191 return false;
19192 }
19193 }
19194 /* FALLTHRU */
19195
19196 case ROTATE:
19197 case ASHIFTRT:
19198 case LSHIFTRT:
19199 case ROTATERT:
19200 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19201 {
19202 if (CONST_INT_P (XEXP (x, 1)))
19203 {
19204 if (INTVAL (XEXP (x, 1)) > 32)
19205 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19206 else
19207 *total = ix86_cost->shift_const * 2;
19208 }
19209 else
19210 {
19211 if (GET_CODE (XEXP (x, 1)) == AND)
19212 *total = ix86_cost->shift_var * 2;
19213 else
19214 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19215 }
19216 }
19217 else
19218 {
19219 if (CONST_INT_P (XEXP (x, 1)))
19220 *total = ix86_cost->shift_const;
19221 else
19222 *total = ix86_cost->shift_var;
19223 }
19224 return false;
19225
19226 case MULT:
19227 if (FLOAT_MODE_P (mode))
19228 {
19229 *total = ix86_cost->fmul;
19230 return false;
19231 }
19232 else
19233 {
19234 rtx op0 = XEXP (x, 0);
19235 rtx op1 = XEXP (x, 1);
19236 int nbits;
19237 if (CONST_INT_P (XEXP (x, 1)))
19238 {
19239 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19240 for (nbits = 0; value != 0; value &= value - 1)
19241 nbits++;
19242 }
19243 else
19244 /* This is arbitrary. */
19245 nbits = 7;
19246
19247 /* Compute costs correctly for widening multiplication. */
19248 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19249 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19250 == GET_MODE_SIZE (mode))
19251 {
19252 int is_mulwiden = 0;
19253 enum machine_mode inner_mode = GET_MODE (op0);
19254
19255 if (GET_CODE (op0) == GET_CODE (op1))
19256 is_mulwiden = 1, op1 = XEXP (op1, 0);
19257 else if (CONST_INT_P (op1))
19258 {
19259 if (GET_CODE (op0) == SIGN_EXTEND)
19260 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19261 == INTVAL (op1);
19262 else
19263 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19264 }
19265
19266 if (is_mulwiden)
19267 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19268 }
19269
19270 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19271 + nbits * ix86_cost->mult_bit
19272 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19273
19274 return true;
19275 }
19276
19277 case DIV:
19278 case UDIV:
19279 case MOD:
19280 case UMOD:
19281 if (FLOAT_MODE_P (mode))
19282 *total = ix86_cost->fdiv;
19283 else
19284 *total = ix86_cost->divide[MODE_INDEX (mode)];
19285 return false;
19286
19287 case PLUS:
19288 if (FLOAT_MODE_P (mode))
19289 *total = ix86_cost->fadd;
19290 else if (GET_MODE_CLASS (mode) == MODE_INT
19291 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19292 {
19293 if (GET_CODE (XEXP (x, 0)) == PLUS
19294 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19295 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19296 && CONSTANT_P (XEXP (x, 1)))
19297 {
19298 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19299 if (val == 2 || val == 4 || val == 8)
19300 {
19301 *total = ix86_cost->lea;
19302 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19303 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19304 outer_code);
19305 *total += rtx_cost (XEXP (x, 1), outer_code);
19306 return true;
19307 }
19308 }
19309 else if (GET_CODE (XEXP (x, 0)) == MULT
19310 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19311 {
19312 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19313 if (val == 2 || val == 4 || val == 8)
19314 {
19315 *total = ix86_cost->lea;
19316 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19317 *total += rtx_cost (XEXP (x, 1), outer_code);
19318 return true;
19319 }
19320 }
19321 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19322 {
19323 *total = ix86_cost->lea;
19324 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19325 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19326 *total += rtx_cost (XEXP (x, 1), outer_code);
19327 return true;
19328 }
19329 }
19330 /* FALLTHRU */
19331
19332 case MINUS:
19333 if (FLOAT_MODE_P (mode))
19334 {
19335 *total = ix86_cost->fadd;
19336 return false;
19337 }
19338 /* FALLTHRU */
19339
19340 case AND:
19341 case IOR:
19342 case XOR:
19343 if (!TARGET_64BIT && mode == DImode)
19344 {
19345 *total = (ix86_cost->add * 2
19346 + (rtx_cost (XEXP (x, 0), outer_code)
19347 << (GET_MODE (XEXP (x, 0)) != DImode))
19348 + (rtx_cost (XEXP (x, 1), outer_code)
19349 << (GET_MODE (XEXP (x, 1)) != DImode)));
19350 return true;
19351 }
19352 /* FALLTHRU */
19353
19354 case NEG:
19355 if (FLOAT_MODE_P (mode))
19356 {
19357 *total = ix86_cost->fchs;
19358 return false;
19359 }
19360 /* FALLTHRU */
19361
19362 case NOT:
19363 if (!TARGET_64BIT && mode == DImode)
19364 *total = ix86_cost->add * 2;
19365 else
19366 *total = ix86_cost->add;
19367 return false;
19368
19369 case COMPARE:
19370 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19371 && XEXP (XEXP (x, 0), 1) == const1_rtx
19372 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19373 && XEXP (x, 1) == const0_rtx)
19374 {
19375 /* This kind of construct is implemented using test[bwl].
19376 Treat it as if we had an AND. */
19377 *total = (ix86_cost->add
19378 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19379 + rtx_cost (const1_rtx, outer_code));
19380 return true;
19381 }
19382 return false;
19383
19384 case FLOAT_EXTEND:
19385 if (!TARGET_SSE_MATH
19386 || mode == XFmode
19387 || (mode == DFmode && !TARGET_SSE2))
19388 *total = 0;
19389 return false;
19390
19391 case ABS:
19392 if (FLOAT_MODE_P (mode))
19393 *total = ix86_cost->fabs;
19394 return false;
19395
19396 case SQRT:
19397 if (FLOAT_MODE_P (mode))
19398 *total = ix86_cost->fsqrt;
19399 return false;
19400
19401 case UNSPEC:
19402 if (XINT (x, 1) == UNSPEC_TP)
19403 *total = 0;
19404 return false;
19405
19406 default:
19407 return false;
19408 }
19409 }
19410
19411 #if TARGET_MACHO
19412
19413 static int current_machopic_label_num;
19414
19415 /* Given a symbol name and its associated stub, write out the
19416 definition of the stub. */
19417
19418 void
19419 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19420 {
19421 unsigned int length;
19422 char *binder_name, *symbol_name, lazy_ptr_name[32];
19423 int label = ++current_machopic_label_num;
19424
19425 /* For 64-bit we shouldn't get here. */
19426 gcc_assert (!TARGET_64BIT);
19427
19428 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19429 symb = (*targetm.strip_name_encoding) (symb);
19430
19431 length = strlen (stub);
19432 binder_name = alloca (length + 32);
19433 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19434
19435 length = strlen (symb);
19436 symbol_name = alloca (length + 32);
19437 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19438
19439 sprintf (lazy_ptr_name, "L%d$lz", label);
19440
19441 if (MACHOPIC_PURE)
19442 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19443 else
19444 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19445
19446 fprintf (file, "%s:\n", stub);
19447 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19448
19449 if (MACHOPIC_PURE)
19450 {
19451 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19452 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19453 fprintf (file, "\tjmp\t*%%edx\n");
19454 }
19455 else
19456 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19457
19458 fprintf (file, "%s:\n", binder_name);
19459
19460 if (MACHOPIC_PURE)
19461 {
19462 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19463 fprintf (file, "\tpushl\t%%eax\n");
19464 }
19465 else
19466 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19467
19468 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19469
19470 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19471 fprintf (file, "%s:\n", lazy_ptr_name);
19472 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19473 fprintf (file, "\t.long %s\n", binder_name);
19474 }
19475
19476 void
19477 darwin_x86_file_end (void)
19478 {
19479 darwin_file_end ();
19480 ix86_file_end ();
19481 }
19482 #endif /* TARGET_MACHO */
19483
19484 /* Order the registers for register allocator. */
19485
19486 void
19487 x86_order_regs_for_local_alloc (void)
19488 {
19489 int pos = 0;
19490 int i;
19491
19492 /* First allocate the local general purpose registers. */
19493 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19494 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19495 reg_alloc_order [pos++] = i;
19496
19497 /* Global general purpose registers. */
19498 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19499 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19500 reg_alloc_order [pos++] = i;
19501
19502 /* x87 registers come first in case we are doing FP math
19503 using them. */
19504 if (!TARGET_SSE_MATH)
19505 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19506 reg_alloc_order [pos++] = i;
19507
19508 /* SSE registers. */
19509 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19510 reg_alloc_order [pos++] = i;
19511 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19512 reg_alloc_order [pos++] = i;
19513
19514 /* x87 registers. */
19515 if (TARGET_SSE_MATH)
19516 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19517 reg_alloc_order [pos++] = i;
19518
19519 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19520 reg_alloc_order [pos++] = i;
19521
19522 /* Initialize the rest of array as we do not allocate some registers
19523 at all. */
19524 while (pos < FIRST_PSEUDO_REGISTER)
19525 reg_alloc_order [pos++] = 0;
19526 }
19527
19528 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19529 struct attribute_spec.handler. */
19530 static tree
19531 ix86_handle_struct_attribute (tree *node, tree name,
19532 tree args ATTRIBUTE_UNUSED,
19533 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19534 {
19535 tree *type = NULL;
19536 if (DECL_P (*node))
19537 {
19538 if (TREE_CODE (*node) == TYPE_DECL)
19539 type = &TREE_TYPE (*node);
19540 }
19541 else
19542 type = node;
19543
19544 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19545 || TREE_CODE (*type) == UNION_TYPE)))
19546 {
19547 warning (OPT_Wattributes, "%qs attribute ignored",
19548 IDENTIFIER_POINTER (name));
19549 *no_add_attrs = true;
19550 }
19551
19552 else if ((is_attribute_p ("ms_struct", name)
19553 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19554 || ((is_attribute_p ("gcc_struct", name)
19555 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19556 {
19557 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19558 IDENTIFIER_POINTER (name));
19559 *no_add_attrs = true;
19560 }
19561
19562 return NULL_TREE;
19563 }
19564
19565 static bool
19566 ix86_ms_bitfield_layout_p (tree record_type)
19567 {
19568 return (TARGET_MS_BITFIELD_LAYOUT &&
19569 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19570 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19571 }
19572
19573 /* Returns an expression indicating where the this parameter is
19574 located on entry to the FUNCTION. */
19575
19576 static rtx
19577 x86_this_parameter (tree function)
19578 {
19579 tree type = TREE_TYPE (function);
19580
19581 if (TARGET_64BIT)
19582 {
19583 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19584 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19585 }
19586
19587 if (ix86_function_regparm (type, function) > 0)
19588 {
19589 tree parm;
19590
19591 parm = TYPE_ARG_TYPES (type);
19592 /* Figure out whether or not the function has a variable number of
19593 arguments. */
19594 for (; parm; parm = TREE_CHAIN (parm))
19595 if (TREE_VALUE (parm) == void_type_node)
19596 break;
19597 /* If not, the this parameter is in the first argument. */
19598 if (parm)
19599 {
19600 int regno = 0;
19601 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19602 regno = 2;
19603 return gen_rtx_REG (SImode, regno);
19604 }
19605 }
19606
19607 if (aggregate_value_p (TREE_TYPE (type), type))
19608 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19609 else
19610 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19611 }
19612
19613 /* Determine whether x86_output_mi_thunk can succeed. */
19614
19615 static bool
19616 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19617 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19618 HOST_WIDE_INT vcall_offset, tree function)
19619 {
19620 /* 64-bit can handle anything. */
19621 if (TARGET_64BIT)
19622 return true;
19623
19624 /* For 32-bit, everything's fine if we have one free register. */
19625 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19626 return true;
19627
19628 /* Need a free register for vcall_offset. */
19629 if (vcall_offset)
19630 return false;
19631
19632 /* Need a free register for GOT references. */
19633 if (flag_pic && !(*targetm.binds_local_p) (function))
19634 return false;
19635
19636 /* Otherwise ok. */
19637 return true;
19638 }
19639
19640 /* Output the assembler code for a thunk function. THUNK_DECL is the
19641 declaration for the thunk function itself, FUNCTION is the decl for
19642 the target function. DELTA is an immediate constant offset to be
19643 added to THIS. If VCALL_OFFSET is nonzero, the word at
19644 *(*this + vcall_offset) should be added to THIS. */
19645
19646 static void
19647 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19648 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19649 HOST_WIDE_INT vcall_offset, tree function)
19650 {
19651 rtx xops[3];
19652 rtx this = x86_this_parameter (function);
19653 rtx this_reg, tmp;
19654
19655 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19656 pull it in now and let DELTA benefit. */
19657 if (REG_P (this))
19658 this_reg = this;
19659 else if (vcall_offset)
19660 {
19661 /* Put the this parameter into %eax. */
19662 xops[0] = this;
19663 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19664 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19665 }
19666 else
19667 this_reg = NULL_RTX;
19668
19669 /* Adjust the this parameter by a fixed constant. */
19670 if (delta)
19671 {
19672 xops[0] = GEN_INT (delta);
19673 xops[1] = this_reg ? this_reg : this;
19674 if (TARGET_64BIT)
19675 {
19676 if (!x86_64_general_operand (xops[0], DImode))
19677 {
19678 tmp = gen_rtx_REG (DImode, R10_REG);
19679 xops[1] = tmp;
19680 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19681 xops[0] = tmp;
19682 xops[1] = this;
19683 }
19684 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19685 }
19686 else
19687 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19688 }
19689
19690 /* Adjust the this parameter by a value stored in the vtable. */
19691 if (vcall_offset)
19692 {
19693 if (TARGET_64BIT)
19694 tmp = gen_rtx_REG (DImode, R10_REG);
19695 else
19696 {
19697 int tmp_regno = 2 /* ECX */;
19698 if (lookup_attribute ("fastcall",
19699 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19700 tmp_regno = 0 /* EAX */;
19701 tmp = gen_rtx_REG (SImode, tmp_regno);
19702 }
19703
19704 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19705 xops[1] = tmp;
19706 if (TARGET_64BIT)
19707 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19708 else
19709 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19710
19711 /* Adjust the this parameter. */
19712 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19713 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19714 {
19715 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19716 xops[0] = GEN_INT (vcall_offset);
19717 xops[1] = tmp2;
19718 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19719 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19720 }
19721 xops[1] = this_reg;
19722 if (TARGET_64BIT)
19723 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19724 else
19725 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19726 }
19727
19728 /* If necessary, drop THIS back to its stack slot. */
19729 if (this_reg && this_reg != this)
19730 {
19731 xops[0] = this_reg;
19732 xops[1] = this;
19733 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19734 }
19735
19736 xops[0] = XEXP (DECL_RTL (function), 0);
19737 if (TARGET_64BIT)
19738 {
19739 if (!flag_pic || (*targetm.binds_local_p) (function))
19740 output_asm_insn ("jmp\t%P0", xops);
19741 else
19742 {
19743 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19744 tmp = gen_rtx_CONST (Pmode, tmp);
19745 tmp = gen_rtx_MEM (QImode, tmp);
19746 xops[0] = tmp;
19747 output_asm_insn ("jmp\t%A0", xops);
19748 }
19749 }
19750 else
19751 {
19752 if (!flag_pic || (*targetm.binds_local_p) (function))
19753 output_asm_insn ("jmp\t%P0", xops);
19754 else
19755 #if TARGET_MACHO
19756 if (TARGET_MACHO)
19757 {
19758 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19759 tmp = (gen_rtx_SYMBOL_REF
19760 (Pmode,
19761 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19762 tmp = gen_rtx_MEM (QImode, tmp);
19763 xops[0] = tmp;
19764 output_asm_insn ("jmp\t%0", xops);
19765 }
19766 else
19767 #endif /* TARGET_MACHO */
19768 {
19769 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19770 output_set_got (tmp, NULL_RTX);
19771
19772 xops[1] = tmp;
19773 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19774 output_asm_insn ("jmp\t{*}%1", xops);
19775 }
19776 }
19777 }
19778
19779 static void
19780 x86_file_start (void)
19781 {
19782 default_file_start ();
19783 #if TARGET_MACHO
19784 darwin_file_start ();
19785 #endif
19786 if (X86_FILE_START_VERSION_DIRECTIVE)
19787 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19788 if (X86_FILE_START_FLTUSED)
19789 fputs ("\t.global\t__fltused\n", asm_out_file);
19790 if (ix86_asm_dialect == ASM_INTEL)
19791 fputs ("\t.intel_syntax\n", asm_out_file);
19792 }
19793
19794 int
19795 x86_field_alignment (tree field, int computed)
19796 {
19797 enum machine_mode mode;
19798 tree type = TREE_TYPE (field);
19799
19800 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19801 return computed;
19802 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19803 ? get_inner_array_type (type) : type);
19804 if (mode == DFmode || mode == DCmode
19805 || GET_MODE_CLASS (mode) == MODE_INT
19806 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19807 return MIN (32, computed);
19808 return computed;
19809 }
19810
19811 /* Output assembler code to FILE to increment profiler label # LABELNO
19812 for profiling a function entry. */
19813 void
19814 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19815 {
19816 if (TARGET_64BIT)
19817 if (flag_pic)
19818 {
19819 #ifndef NO_PROFILE_COUNTERS
19820 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19821 #endif
19822 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19823 }
19824 else
19825 {
19826 #ifndef NO_PROFILE_COUNTERS
19827 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19828 #endif
19829 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19830 }
19831 else if (flag_pic)
19832 {
19833 #ifndef NO_PROFILE_COUNTERS
19834 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19835 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19836 #endif
19837 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19838 }
19839 else
19840 {
19841 #ifndef NO_PROFILE_COUNTERS
19842 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19843 PROFILE_COUNT_REGISTER);
19844 #endif
19845 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19846 }
19847 }
19848
19849 /* We don't have exact information about the insn sizes, but we may assume
19850 quite safely that we are informed about all 1 byte insns and memory
19851 address sizes. This is enough to eliminate unnecessary padding in
19852 99% of cases. */
19853
19854 static int
19855 min_insn_size (rtx insn)
19856 {
19857 int l = 0;
19858
19859 if (!INSN_P (insn) || !active_insn_p (insn))
19860 return 0;
19861
19862 /* Discard alignments we've emit and jump instructions. */
19863 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19864 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19865 return 0;
19866 if (JUMP_P (insn)
19867 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19868 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19869 return 0;
19870
19871 /* Important case - calls are always 5 bytes.
19872 It is common to have many calls in the row. */
19873 if (CALL_P (insn)
19874 && symbolic_reference_mentioned_p (PATTERN (insn))
19875 && !SIBLING_CALL_P (insn))
19876 return 5;
19877 if (get_attr_length (insn) <= 1)
19878 return 1;
19879
19880 /* For normal instructions we may rely on the sizes of addresses
19881 and the presence of symbol to require 4 bytes of encoding.
19882 This is not the case for jumps where references are PC relative. */
19883 if (!JUMP_P (insn))
19884 {
19885 l = get_attr_length_address (insn);
19886 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19887 l = 4;
19888 }
19889 if (l)
19890 return 1+l;
19891 else
19892 return 2;
19893 }
19894
19895 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19896 window. */
19897
19898 static void
19899 ix86_avoid_jump_misspredicts (void)
19900 {
19901 rtx insn, start = get_insns ();
19902 int nbytes = 0, njumps = 0;
19903 int isjump = 0;
19904
19905 /* Look for all minimal intervals of instructions containing 4 jumps.
19906 The intervals are bounded by START and INSN. NBYTES is the total
19907 size of instructions in the interval including INSN and not including
19908 START. When the NBYTES is smaller than 16 bytes, it is possible
19909 that the end of START and INSN ends up in the same 16byte page.
19910
19911 The smallest offset in the page INSN can start is the case where START
19912 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19913 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19914 */
19915 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19916 {
19917
19918 nbytes += min_insn_size (insn);
19919 if (dump_file)
19920 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19921 INSN_UID (insn), min_insn_size (insn));
19922 if ((JUMP_P (insn)
19923 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19924 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19925 || CALL_P (insn))
19926 njumps++;
19927 else
19928 continue;
19929
19930 while (njumps > 3)
19931 {
19932 start = NEXT_INSN (start);
19933 if ((JUMP_P (start)
19934 && GET_CODE (PATTERN (start)) != ADDR_VEC
19935 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19936 || CALL_P (start))
19937 njumps--, isjump = 1;
19938 else
19939 isjump = 0;
19940 nbytes -= min_insn_size (start);
19941 }
19942 gcc_assert (njumps >= 0);
19943 if (dump_file)
19944 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19945 INSN_UID (start), INSN_UID (insn), nbytes);
19946
19947 if (njumps == 3 && isjump && nbytes < 16)
19948 {
19949 int padsize = 15 - nbytes + min_insn_size (insn);
19950
19951 if (dump_file)
19952 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19953 INSN_UID (insn), padsize);
19954 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19955 }
19956 }
19957 }
19958
19959 /* AMD Athlon works faster
19960 when RET is not destination of conditional jump or directly preceded
19961 by other jump instruction. We avoid the penalty by inserting NOP just
19962 before the RET instructions in such cases. */
19963 static void
19964 ix86_pad_returns (void)
19965 {
19966 edge e;
19967 edge_iterator ei;
19968
19969 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19970 {
19971 basic_block bb = e->src;
19972 rtx ret = BB_END (bb);
19973 rtx prev;
19974 bool replace = false;
19975
19976 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19977 || !maybe_hot_bb_p (bb))
19978 continue;
19979 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19980 if (active_insn_p (prev) || LABEL_P (prev))
19981 break;
19982 if (prev && LABEL_P (prev))
19983 {
19984 edge e;
19985 edge_iterator ei;
19986
19987 FOR_EACH_EDGE (e, ei, bb->preds)
19988 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19989 && !(e->flags & EDGE_FALLTHRU))
19990 replace = true;
19991 }
19992 if (!replace)
19993 {
19994 prev = prev_active_insn (ret);
19995 if (prev
19996 && ((JUMP_P (prev) && any_condjump_p (prev))
19997 || CALL_P (prev)))
19998 replace = true;
19999 /* Empty functions get branch mispredict even when the jump destination
20000 is not visible to us. */
20001 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
20002 replace = true;
20003 }
20004 if (replace)
20005 {
20006 emit_insn_before (gen_return_internal_long (), ret);
20007 delete_insn (ret);
20008 }
20009 }
20010 }
20011
20012 /* Implement machine specific optimizations. We implement padding of returns
20013 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
20014 static void
20015 ix86_reorg (void)
20016 {
20017 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
20018 ix86_pad_returns ();
20019 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
20020 ix86_avoid_jump_misspredicts ();
20021 }
20022
20023 /* Return nonzero when QImode register that must be represented via REX prefix
20024 is used. */
20025 bool
20026 x86_extended_QIreg_mentioned_p (rtx insn)
20027 {
20028 int i;
20029 extract_insn_cached (insn);
20030 for (i = 0; i < recog_data.n_operands; i++)
20031 if (REG_P (recog_data.operand[i])
20032 && REGNO (recog_data.operand[i]) >= 4)
20033 return true;
20034 return false;
20035 }
20036
20037 /* Return nonzero when P points to register encoded via REX prefix.
20038 Called via for_each_rtx. */
20039 static int
20040 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
20041 {
20042 unsigned int regno;
20043 if (!REG_P (*p))
20044 return 0;
20045 regno = REGNO (*p);
20046 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
20047 }
20048
20049 /* Return true when INSN mentions register that must be encoded using REX
20050 prefix. */
20051 bool
20052 x86_extended_reg_mentioned_p (rtx insn)
20053 {
20054 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
20055 }
20056
20057 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
20058 optabs would emit if we didn't have TFmode patterns. */
20059
20060 void
20061 x86_emit_floatuns (rtx operands[2])
20062 {
20063 rtx neglab, donelab, i0, i1, f0, in, out;
20064 enum machine_mode mode, inmode;
20065
20066 inmode = GET_MODE (operands[1]);
20067 gcc_assert (inmode == SImode || inmode == DImode);
20068
20069 out = operands[0];
20070 in = force_reg (inmode, operands[1]);
20071 mode = GET_MODE (out);
20072 neglab = gen_label_rtx ();
20073 donelab = gen_label_rtx ();
20074 f0 = gen_reg_rtx (mode);
20075
20076 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
20077
20078 expand_float (out, in, 0);
20079
20080 emit_jump_insn (gen_jump (donelab));
20081 emit_barrier ();
20082
20083 emit_label (neglab);
20084
20085 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
20086 1, OPTAB_DIRECT);
20087 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
20088 1, OPTAB_DIRECT);
20089 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
20090
20091 expand_float (f0, i0, 0);
20092
20093 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
20094
20095 emit_label (donelab);
20096 }
20097 \f
20098 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20099 with all elements equal to VAR. Return true if successful. */
20100
20101 static bool
20102 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
20103 rtx target, rtx val)
20104 {
20105 enum machine_mode smode, wsmode, wvmode;
20106 rtx x;
20107
20108 switch (mode)
20109 {
20110 case V2SImode:
20111 case V2SFmode:
20112 if (!mmx_ok)
20113 return false;
20114 /* FALLTHRU */
20115
20116 case V2DFmode:
20117 case V2DImode:
20118 case V4SFmode:
20119 case V4SImode:
20120 val = force_reg (GET_MODE_INNER (mode), val);
20121 x = gen_rtx_VEC_DUPLICATE (mode, val);
20122 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20123 return true;
20124
20125 case V4HImode:
20126 if (!mmx_ok)
20127 return false;
20128 if (TARGET_SSE || TARGET_3DNOW_A)
20129 {
20130 val = gen_lowpart (SImode, val);
20131 x = gen_rtx_TRUNCATE (HImode, val);
20132 x = gen_rtx_VEC_DUPLICATE (mode, x);
20133 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20134 return true;
20135 }
20136 else
20137 {
20138 smode = HImode;
20139 wsmode = SImode;
20140 wvmode = V2SImode;
20141 goto widen;
20142 }
20143
20144 case V8QImode:
20145 if (!mmx_ok)
20146 return false;
20147 smode = QImode;
20148 wsmode = HImode;
20149 wvmode = V4HImode;
20150 goto widen;
20151 case V8HImode:
20152 if (TARGET_SSE2)
20153 {
20154 rtx tmp1, tmp2;
20155 /* Extend HImode to SImode using a paradoxical SUBREG. */
20156 tmp1 = gen_reg_rtx (SImode);
20157 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20158 /* Insert the SImode value as low element of V4SImode vector. */
20159 tmp2 = gen_reg_rtx (V4SImode);
20160 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20161 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20162 CONST0_RTX (V4SImode),
20163 const1_rtx);
20164 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20165 /* Cast the V4SImode vector back to a V8HImode vector. */
20166 tmp1 = gen_reg_rtx (V8HImode);
20167 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
20168 /* Duplicate the low short through the whole low SImode word. */
20169 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
20170 /* Cast the V8HImode vector back to a V4SImode vector. */
20171 tmp2 = gen_reg_rtx (V4SImode);
20172 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20173 /* Replicate the low element of the V4SImode vector. */
20174 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20175 /* Cast the V2SImode back to V8HImode, and store in target. */
20176 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
20177 return true;
20178 }
20179 smode = HImode;
20180 wsmode = SImode;
20181 wvmode = V4SImode;
20182 goto widen;
20183 case V16QImode:
20184 if (TARGET_SSE2)
20185 {
20186 rtx tmp1, tmp2;
20187 /* Extend QImode to SImode using a paradoxical SUBREG. */
20188 tmp1 = gen_reg_rtx (SImode);
20189 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20190 /* Insert the SImode value as low element of V4SImode vector. */
20191 tmp2 = gen_reg_rtx (V4SImode);
20192 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20193 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20194 CONST0_RTX (V4SImode),
20195 const1_rtx);
20196 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20197 /* Cast the V4SImode vector back to a V16QImode vector. */
20198 tmp1 = gen_reg_rtx (V16QImode);
20199 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20200 /* Duplicate the low byte through the whole low SImode word. */
20201 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20202 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20203 /* Cast the V16QImode vector back to a V4SImode vector. */
20204 tmp2 = gen_reg_rtx (V4SImode);
20205 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20206 /* Replicate the low element of the V4SImode vector. */
20207 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20208 /* Cast the V2SImode back to V16QImode, and store in target. */
20209 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20210 return true;
20211 }
20212 smode = QImode;
20213 wsmode = HImode;
20214 wvmode = V8HImode;
20215 goto widen;
20216 widen:
20217 /* Replicate the value once into the next wider mode and recurse. */
20218 val = convert_modes (wsmode, smode, val, true);
20219 x = expand_simple_binop (wsmode, ASHIFT, val,
20220 GEN_INT (GET_MODE_BITSIZE (smode)),
20221 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20222 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20223
20224 x = gen_reg_rtx (wvmode);
20225 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20226 gcc_unreachable ();
20227 emit_move_insn (target, gen_lowpart (mode, x));
20228 return true;
20229
20230 default:
20231 return false;
20232 }
20233 }
20234
20235 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20236 whose ONE_VAR element is VAR, and other elements are zero. Return true
20237 if successful. */
20238
20239 static bool
20240 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20241 rtx target, rtx var, int one_var)
20242 {
20243 enum machine_mode vsimode;
20244 rtx new_target;
20245 rtx x, tmp;
20246
20247 switch (mode)
20248 {
20249 case V2SFmode:
20250 case V2SImode:
20251 if (!mmx_ok)
20252 return false;
20253 /* FALLTHRU */
20254
20255 case V2DFmode:
20256 case V2DImode:
20257 if (one_var != 0)
20258 return false;
20259 var = force_reg (GET_MODE_INNER (mode), var);
20260 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20261 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20262 return true;
20263
20264 case V4SFmode:
20265 case V4SImode:
20266 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20267 new_target = gen_reg_rtx (mode);
20268 else
20269 new_target = target;
20270 var = force_reg (GET_MODE_INNER (mode), var);
20271 x = gen_rtx_VEC_DUPLICATE (mode, var);
20272 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20273 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20274 if (one_var != 0)
20275 {
20276 /* We need to shuffle the value to the correct position, so
20277 create a new pseudo to store the intermediate result. */
20278
20279 /* With SSE2, we can use the integer shuffle insns. */
20280 if (mode != V4SFmode && TARGET_SSE2)
20281 {
20282 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20283 GEN_INT (1),
20284 GEN_INT (one_var == 1 ? 0 : 1),
20285 GEN_INT (one_var == 2 ? 0 : 1),
20286 GEN_INT (one_var == 3 ? 0 : 1)));
20287 if (target != new_target)
20288 emit_move_insn (target, new_target);
20289 return true;
20290 }
20291
20292 /* Otherwise convert the intermediate result to V4SFmode and
20293 use the SSE1 shuffle instructions. */
20294 if (mode != V4SFmode)
20295 {
20296 tmp = gen_reg_rtx (V4SFmode);
20297 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20298 }
20299 else
20300 tmp = new_target;
20301
20302 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20303 GEN_INT (1),
20304 GEN_INT (one_var == 1 ? 0 : 1),
20305 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20306 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20307
20308 if (mode != V4SFmode)
20309 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20310 else if (tmp != target)
20311 emit_move_insn (target, tmp);
20312 }
20313 else if (target != new_target)
20314 emit_move_insn (target, new_target);
20315 return true;
20316
20317 case V8HImode:
20318 case V16QImode:
20319 vsimode = V4SImode;
20320 goto widen;
20321 case V4HImode:
20322 case V8QImode:
20323 if (!mmx_ok)
20324 return false;
20325 vsimode = V2SImode;
20326 goto widen;
20327 widen:
20328 if (one_var != 0)
20329 return false;
20330
20331 /* Zero extend the variable element to SImode and recurse. */
20332 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20333
20334 x = gen_reg_rtx (vsimode);
20335 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20336 var, one_var))
20337 gcc_unreachable ();
20338
20339 emit_move_insn (target, gen_lowpart (mode, x));
20340 return true;
20341
20342 default:
20343 return false;
20344 }
20345 }
20346
20347 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20348 consisting of the values in VALS. It is known that all elements
20349 except ONE_VAR are constants. Return true if successful. */
20350
20351 static bool
20352 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20353 rtx target, rtx vals, int one_var)
20354 {
20355 rtx var = XVECEXP (vals, 0, one_var);
20356 enum machine_mode wmode;
20357 rtx const_vec, x;
20358
20359 const_vec = copy_rtx (vals);
20360 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20361 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20362
20363 switch (mode)
20364 {
20365 case V2DFmode:
20366 case V2DImode:
20367 case V2SFmode:
20368 case V2SImode:
20369 /* For the two element vectors, it's just as easy to use
20370 the general case. */
20371 return false;
20372
20373 case V4SFmode:
20374 case V4SImode:
20375 case V8HImode:
20376 case V4HImode:
20377 break;
20378
20379 case V16QImode:
20380 wmode = V8HImode;
20381 goto widen;
20382 case V8QImode:
20383 wmode = V4HImode;
20384 goto widen;
20385 widen:
20386 /* There's no way to set one QImode entry easily. Combine
20387 the variable value with its adjacent constant value, and
20388 promote to an HImode set. */
20389 x = XVECEXP (vals, 0, one_var ^ 1);
20390 if (one_var & 1)
20391 {
20392 var = convert_modes (HImode, QImode, var, true);
20393 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20394 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20395 x = GEN_INT (INTVAL (x) & 0xff);
20396 }
20397 else
20398 {
20399 var = convert_modes (HImode, QImode, var, true);
20400 x = gen_int_mode (INTVAL (x) << 8, HImode);
20401 }
20402 if (x != const0_rtx)
20403 var = expand_simple_binop (HImode, IOR, var, x, var,
20404 1, OPTAB_LIB_WIDEN);
20405
20406 x = gen_reg_rtx (wmode);
20407 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20408 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20409
20410 emit_move_insn (target, gen_lowpart (mode, x));
20411 return true;
20412
20413 default:
20414 return false;
20415 }
20416
20417 emit_move_insn (target, const_vec);
20418 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20419 return true;
20420 }
20421
20422 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20423 all values variable, and none identical. */
20424
20425 static void
20426 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20427 rtx target, rtx vals)
20428 {
20429 enum machine_mode half_mode = GET_MODE_INNER (mode);
20430 rtx op0 = NULL, op1 = NULL;
20431 bool use_vec_concat = false;
20432
20433 switch (mode)
20434 {
20435 case V2SFmode:
20436 case V2SImode:
20437 if (!mmx_ok && !TARGET_SSE)
20438 break;
20439 /* FALLTHRU */
20440
20441 case V2DFmode:
20442 case V2DImode:
20443 /* For the two element vectors, we always implement VEC_CONCAT. */
20444 op0 = XVECEXP (vals, 0, 0);
20445 op1 = XVECEXP (vals, 0, 1);
20446 use_vec_concat = true;
20447 break;
20448
20449 case V4SFmode:
20450 half_mode = V2SFmode;
20451 goto half;
20452 case V4SImode:
20453 half_mode = V2SImode;
20454 goto half;
20455 half:
20456 {
20457 rtvec v;
20458
20459 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20460 Recurse to load the two halves. */
20461
20462 op0 = gen_reg_rtx (half_mode);
20463 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20464 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20465
20466 op1 = gen_reg_rtx (half_mode);
20467 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20468 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20469
20470 use_vec_concat = true;
20471 }
20472 break;
20473
20474 case V8HImode:
20475 case V16QImode:
20476 case V4HImode:
20477 case V8QImode:
20478 break;
20479
20480 default:
20481 gcc_unreachable ();
20482 }
20483
20484 if (use_vec_concat)
20485 {
20486 if (!register_operand (op0, half_mode))
20487 op0 = force_reg (half_mode, op0);
20488 if (!register_operand (op1, half_mode))
20489 op1 = force_reg (half_mode, op1);
20490
20491 emit_insn (gen_rtx_SET (VOIDmode, target,
20492 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20493 }
20494 else
20495 {
20496 int i, j, n_elts, n_words, n_elt_per_word;
20497 enum machine_mode inner_mode;
20498 rtx words[4], shift;
20499
20500 inner_mode = GET_MODE_INNER (mode);
20501 n_elts = GET_MODE_NUNITS (mode);
20502 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20503 n_elt_per_word = n_elts / n_words;
20504 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20505
20506 for (i = 0; i < n_words; ++i)
20507 {
20508 rtx word = NULL_RTX;
20509
20510 for (j = 0; j < n_elt_per_word; ++j)
20511 {
20512 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20513 elt = convert_modes (word_mode, inner_mode, elt, true);
20514
20515 if (j == 0)
20516 word = elt;
20517 else
20518 {
20519 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20520 word, 1, OPTAB_LIB_WIDEN);
20521 word = expand_simple_binop (word_mode, IOR, word, elt,
20522 word, 1, OPTAB_LIB_WIDEN);
20523 }
20524 }
20525
20526 words[i] = word;
20527 }
20528
20529 if (n_words == 1)
20530 emit_move_insn (target, gen_lowpart (mode, words[0]));
20531 else if (n_words == 2)
20532 {
20533 rtx tmp = gen_reg_rtx (mode);
20534 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20535 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20536 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20537 emit_move_insn (target, tmp);
20538 }
20539 else if (n_words == 4)
20540 {
20541 rtx tmp = gen_reg_rtx (V4SImode);
20542 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20543 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20544 emit_move_insn (target, gen_lowpart (mode, tmp));
20545 }
20546 else
20547 gcc_unreachable ();
20548 }
20549 }
20550
20551 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20552 instructions unless MMX_OK is true. */
20553
20554 void
20555 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20556 {
20557 enum machine_mode mode = GET_MODE (target);
20558 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20559 int n_elts = GET_MODE_NUNITS (mode);
20560 int n_var = 0, one_var = -1;
20561 bool all_same = true, all_const_zero = true;
20562 int i;
20563 rtx x;
20564
20565 for (i = 0; i < n_elts; ++i)
20566 {
20567 x = XVECEXP (vals, 0, i);
20568 if (!CONSTANT_P (x))
20569 n_var++, one_var = i;
20570 else if (x != CONST0_RTX (inner_mode))
20571 all_const_zero = false;
20572 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20573 all_same = false;
20574 }
20575
20576 /* Constants are best loaded from the constant pool. */
20577 if (n_var == 0)
20578 {
20579 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20580 return;
20581 }
20582
20583 /* If all values are identical, broadcast the value. */
20584 if (all_same
20585 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20586 XVECEXP (vals, 0, 0)))
20587 return;
20588
20589 /* Values where only one field is non-constant are best loaded from
20590 the pool and overwritten via move later. */
20591 if (n_var == 1)
20592 {
20593 if (all_const_zero
20594 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20595 XVECEXP (vals, 0, one_var),
20596 one_var))
20597 return;
20598
20599 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20600 return;
20601 }
20602
20603 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20604 }
20605
20606 void
20607 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20608 {
20609 enum machine_mode mode = GET_MODE (target);
20610 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20611 bool use_vec_merge = false;
20612 rtx tmp;
20613
20614 switch (mode)
20615 {
20616 case V2SFmode:
20617 case V2SImode:
20618 if (mmx_ok)
20619 {
20620 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20621 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20622 if (elt == 0)
20623 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20624 else
20625 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20626 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20627 return;
20628 }
20629 break;
20630
20631 case V2DFmode:
20632 case V2DImode:
20633 {
20634 rtx op0, op1;
20635
20636 /* For the two element vectors, we implement a VEC_CONCAT with
20637 the extraction of the other element. */
20638
20639 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20640 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20641
20642 if (elt == 0)
20643 op0 = val, op1 = tmp;
20644 else
20645 op0 = tmp, op1 = val;
20646
20647 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20648 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20649 }
20650 return;
20651
20652 case V4SFmode:
20653 switch (elt)
20654 {
20655 case 0:
20656 use_vec_merge = true;
20657 break;
20658
20659 case 1:
20660 /* tmp = target = A B C D */
20661 tmp = copy_to_reg (target);
20662 /* target = A A B B */
20663 emit_insn (gen_sse_unpcklps (target, target, target));
20664 /* target = X A B B */
20665 ix86_expand_vector_set (false, target, val, 0);
20666 /* target = A X C D */
20667 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20668 GEN_INT (1), GEN_INT (0),
20669 GEN_INT (2+4), GEN_INT (3+4)));
20670 return;
20671
20672 case 2:
20673 /* tmp = target = A B C D */
20674 tmp = copy_to_reg (target);
20675 /* tmp = X B C D */
20676 ix86_expand_vector_set (false, tmp, val, 0);
20677 /* target = A B X D */
20678 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20679 GEN_INT (0), GEN_INT (1),
20680 GEN_INT (0+4), GEN_INT (3+4)));
20681 return;
20682
20683 case 3:
20684 /* tmp = target = A B C D */
20685 tmp = copy_to_reg (target);
20686 /* tmp = X B C D */
20687 ix86_expand_vector_set (false, tmp, val, 0);
20688 /* target = A B X D */
20689 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20690 GEN_INT (0), GEN_INT (1),
20691 GEN_INT (2+4), GEN_INT (0+4)));
20692 return;
20693
20694 default:
20695 gcc_unreachable ();
20696 }
20697 break;
20698
20699 case V4SImode:
20700 /* Element 0 handled by vec_merge below. */
20701 if (elt == 0)
20702 {
20703 use_vec_merge = true;
20704 break;
20705 }
20706
20707 if (TARGET_SSE2)
20708 {
20709 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20710 store into element 0, then shuffle them back. */
20711
20712 rtx order[4];
20713
20714 order[0] = GEN_INT (elt);
20715 order[1] = const1_rtx;
20716 order[2] = const2_rtx;
20717 order[3] = GEN_INT (3);
20718 order[elt] = const0_rtx;
20719
20720 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20721 order[1], order[2], order[3]));
20722
20723 ix86_expand_vector_set (false, target, val, 0);
20724
20725 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20726 order[1], order[2], order[3]));
20727 }
20728 else
20729 {
20730 /* For SSE1, we have to reuse the V4SF code. */
20731 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20732 gen_lowpart (SFmode, val), elt);
20733 }
20734 return;
20735
20736 case V8HImode:
20737 use_vec_merge = TARGET_SSE2;
20738 break;
20739 case V4HImode:
20740 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20741 break;
20742
20743 case V16QImode:
20744 case V8QImode:
20745 default:
20746 break;
20747 }
20748
20749 if (use_vec_merge)
20750 {
20751 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20752 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20753 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20754 }
20755 else
20756 {
20757 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20758
20759 emit_move_insn (mem, target);
20760
20761 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20762 emit_move_insn (tmp, val);
20763
20764 emit_move_insn (target, mem);
20765 }
20766 }
20767
20768 void
20769 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20770 {
20771 enum machine_mode mode = GET_MODE (vec);
20772 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20773 bool use_vec_extr = false;
20774 rtx tmp;
20775
20776 switch (mode)
20777 {
20778 case V2SImode:
20779 case V2SFmode:
20780 if (!mmx_ok)
20781 break;
20782 /* FALLTHRU */
20783
20784 case V2DFmode:
20785 case V2DImode:
20786 use_vec_extr = true;
20787 break;
20788
20789 case V4SFmode:
20790 switch (elt)
20791 {
20792 case 0:
20793 tmp = vec;
20794 break;
20795
20796 case 1:
20797 case 3:
20798 tmp = gen_reg_rtx (mode);
20799 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20800 GEN_INT (elt), GEN_INT (elt),
20801 GEN_INT (elt+4), GEN_INT (elt+4)));
20802 break;
20803
20804 case 2:
20805 tmp = gen_reg_rtx (mode);
20806 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20807 break;
20808
20809 default:
20810 gcc_unreachable ();
20811 }
20812 vec = tmp;
20813 use_vec_extr = true;
20814 elt = 0;
20815 break;
20816
20817 case V4SImode:
20818 if (TARGET_SSE2)
20819 {
20820 switch (elt)
20821 {
20822 case 0:
20823 tmp = vec;
20824 break;
20825
20826 case 1:
20827 case 3:
20828 tmp = gen_reg_rtx (mode);
20829 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20830 GEN_INT (elt), GEN_INT (elt),
20831 GEN_INT (elt), GEN_INT (elt)));
20832 break;
20833
20834 case 2:
20835 tmp = gen_reg_rtx (mode);
20836 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20837 break;
20838
20839 default:
20840 gcc_unreachable ();
20841 }
20842 vec = tmp;
20843 use_vec_extr = true;
20844 elt = 0;
20845 }
20846 else
20847 {
20848 /* For SSE1, we have to reuse the V4SF code. */
20849 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20850 gen_lowpart (V4SFmode, vec), elt);
20851 return;
20852 }
20853 break;
20854
20855 case V8HImode:
20856 use_vec_extr = TARGET_SSE2;
20857 break;
20858 case V4HImode:
20859 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20860 break;
20861
20862 case V16QImode:
20863 case V8QImode:
20864 /* ??? Could extract the appropriate HImode element and shift. */
20865 default:
20866 break;
20867 }
20868
20869 if (use_vec_extr)
20870 {
20871 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20872 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20873
20874 /* Let the rtl optimizers know about the zero extension performed. */
20875 if (inner_mode == HImode)
20876 {
20877 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20878 target = gen_lowpart (SImode, target);
20879 }
20880
20881 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20882 }
20883 else
20884 {
20885 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20886
20887 emit_move_insn (mem, vec);
20888
20889 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20890 emit_move_insn (target, tmp);
20891 }
20892 }
20893
20894 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20895 pattern to reduce; DEST is the destination; IN is the input vector. */
20896
20897 void
20898 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20899 {
20900 rtx tmp1, tmp2, tmp3;
20901
20902 tmp1 = gen_reg_rtx (V4SFmode);
20903 tmp2 = gen_reg_rtx (V4SFmode);
20904 tmp3 = gen_reg_rtx (V4SFmode);
20905
20906 emit_insn (gen_sse_movhlps (tmp1, in, in));
20907 emit_insn (fn (tmp2, tmp1, in));
20908
20909 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20910 GEN_INT (1), GEN_INT (1),
20911 GEN_INT (1+4), GEN_INT (1+4)));
20912 emit_insn (fn (dest, tmp2, tmp3));
20913 }
20914 \f
20915 /* Target hook for scalar_mode_supported_p. */
20916 static bool
20917 ix86_scalar_mode_supported_p (enum machine_mode mode)
20918 {
20919 if (DECIMAL_FLOAT_MODE_P (mode))
20920 return true;
20921 else
20922 return default_scalar_mode_supported_p (mode);
20923 }
20924
20925 /* Implements target hook vector_mode_supported_p. */
20926 static bool
20927 ix86_vector_mode_supported_p (enum machine_mode mode)
20928 {
20929 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20930 return true;
20931 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20932 return true;
20933 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20934 return true;
20935 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20936 return true;
20937 return false;
20938 }
20939
20940 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20941
20942 We do this in the new i386 backend to maintain source compatibility
20943 with the old cc0-based compiler. */
20944
20945 static tree
20946 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20947 tree inputs ATTRIBUTE_UNUSED,
20948 tree clobbers)
20949 {
20950 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20951 clobbers);
20952 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20953 clobbers);
20954 return clobbers;
20955 }
20956
20957 /* Return true if this goes in small data/bss. */
20958
20959 static bool
20960 ix86_in_large_data_p (tree exp)
20961 {
20962 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
20963 return false;
20964
20965 /* Functions are never large data. */
20966 if (TREE_CODE (exp) == FUNCTION_DECL)
20967 return false;
20968
20969 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
20970 {
20971 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
20972 if (strcmp (section, ".ldata") == 0
20973 || strcmp (section, ".lbss") == 0)
20974 return true;
20975 return false;
20976 }
20977 else
20978 {
20979 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
20980
20981 /* If this is an incomplete type with size 0, then we can't put it
20982 in data because it might be too big when completed. */
20983 if (!size || size > ix86_section_threshold)
20984 return true;
20985 }
20986
20987 return false;
20988 }
20989 static void
20990 ix86_encode_section_info (tree decl, rtx rtl, int first)
20991 {
20992 default_encode_section_info (decl, rtl, first);
20993
20994 if (TREE_CODE (decl) == VAR_DECL
20995 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20996 && ix86_in_large_data_p (decl))
20997 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20998 }
20999
21000 /* Worker function for REVERSE_CONDITION. */
21001
21002 enum rtx_code
21003 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
21004 {
21005 return (mode != CCFPmode && mode != CCFPUmode
21006 ? reverse_condition (code)
21007 : reverse_condition_maybe_unordered (code));
21008 }
21009
21010 /* Output code to perform an x87 FP register move, from OPERANDS[1]
21011 to OPERANDS[0]. */
21012
21013 const char *
21014 output_387_reg_move (rtx insn, rtx *operands)
21015 {
21016 if (REG_P (operands[1])
21017 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
21018 {
21019 if (REGNO (operands[0]) == FIRST_STACK_REG)
21020 return output_387_ffreep (operands, 0);
21021 return "fstp\t%y0";
21022 }
21023 if (STACK_TOP_P (operands[0]))
21024 return "fld%z1\t%y1";
21025 return "fst\t%y0";
21026 }
21027
21028 /* Output code to perform a conditional jump to LABEL, if C2 flag in
21029 FP status register is set. */
21030
21031 void
21032 ix86_emit_fp_unordered_jump (rtx label)
21033 {
21034 rtx reg = gen_reg_rtx (HImode);
21035 rtx temp;
21036
21037 emit_insn (gen_x86_fnstsw_1 (reg));
21038
21039 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
21040 {
21041 emit_insn (gen_x86_sahf_1 (reg));
21042
21043 temp = gen_rtx_REG (CCmode, FLAGS_REG);
21044 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
21045 }
21046 else
21047 {
21048 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
21049
21050 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
21051 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
21052 }
21053
21054 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
21055 gen_rtx_LABEL_REF (VOIDmode, label),
21056 pc_rtx);
21057 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
21058
21059 emit_jump_insn (temp);
21060 predict_jump (REG_BR_PROB_BASE * 10 / 100);
21061 }
21062
21063 /* Output code to perform a log1p XFmode calculation. */
21064
21065 void ix86_emit_i387_log1p (rtx op0, rtx op1)
21066 {
21067 rtx label1 = gen_label_rtx ();
21068 rtx label2 = gen_label_rtx ();
21069
21070 rtx tmp = gen_reg_rtx (XFmode);
21071 rtx tmp2 = gen_reg_rtx (XFmode);
21072
21073 emit_insn (gen_absxf2 (tmp, op1));
21074 emit_insn (gen_cmpxf (tmp,
21075 CONST_DOUBLE_FROM_REAL_VALUE (
21076 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
21077 XFmode)));
21078 emit_jump_insn (gen_bge (label1));
21079
21080 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21081 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
21082 emit_jump (label2);
21083
21084 emit_label (label1);
21085 emit_move_insn (tmp, CONST1_RTX (XFmode));
21086 emit_insn (gen_addxf3 (tmp, op1, tmp));
21087 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
21088 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
21089
21090 emit_label (label2);
21091 }
21092
21093 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
21094
21095 static void
21096 i386_solaris_elf_named_section (const char *name, unsigned int flags,
21097 tree decl)
21098 {
21099 /* With Binutils 2.15, the "@unwind" marker must be specified on
21100 every occurrence of the ".eh_frame" section, not just the first
21101 one. */
21102 if (TARGET_64BIT
21103 && strcmp (name, ".eh_frame") == 0)
21104 {
21105 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
21106 flags & SECTION_WRITE ? "aw" : "a");
21107 return;
21108 }
21109 default_elf_asm_named_section (name, flags, decl);
21110 }
21111
21112 /* Return the mangling of TYPE if it is an extended fundamental type. */
21113
21114 static const char *
21115 ix86_mangle_fundamental_type (tree type)
21116 {
21117 switch (TYPE_MODE (type))
21118 {
21119 case TFmode:
21120 /* __float128 is "g". */
21121 return "g";
21122 case XFmode:
21123 /* "long double" or __float80 is "e". */
21124 return "e";
21125 default:
21126 return NULL;
21127 }
21128 }
21129
21130 /* For 32-bit code we can save PIC register setup by using
21131 __stack_chk_fail_local hidden function instead of calling
21132 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
21133 register, so it is better to call __stack_chk_fail directly. */
21134
21135 static tree
21136 ix86_stack_protect_fail (void)
21137 {
21138 return TARGET_64BIT
21139 ? default_external_stack_protect_fail ()
21140 : default_hidden_stack_protect_fail ();
21141 }
21142
21143 /* Select a format to encode pointers in exception handling data. CODE
21144 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
21145 true if the symbol may be affected by dynamic relocations.
21146
21147 ??? All x86 object file formats are capable of representing this.
21148 After all, the relocation needed is the same as for the call insn.
21149 Whether or not a particular assembler allows us to enter such, I
21150 guess we'll have to see. */
21151 int
21152 asm_preferred_eh_data_format (int code, int global)
21153 {
21154 if (flag_pic)
21155 {
21156 int type = DW_EH_PE_sdata8;
21157 if (!TARGET_64BIT
21158 || ix86_cmodel == CM_SMALL_PIC
21159 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
21160 type = DW_EH_PE_sdata4;
21161 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
21162 }
21163 if (ix86_cmodel == CM_SMALL
21164 || (ix86_cmodel == CM_MEDIUM && code))
21165 return DW_EH_PE_udata4;
21166 return DW_EH_PE_absptr;
21167 }
21168 \f
21169 /* Expand copysign from SIGN to the positive value ABS_VALUE
21170 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
21171 the sign-bit. */
21172 static void
21173 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
21174 {
21175 enum machine_mode mode = GET_MODE (sign);
21176 rtx sgn = gen_reg_rtx (mode);
21177 if (mask == NULL_RTX)
21178 {
21179 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
21180 if (!VECTOR_MODE_P (mode))
21181 {
21182 /* We need to generate a scalar mode mask in this case. */
21183 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21184 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21185 mask = gen_reg_rtx (mode);
21186 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21187 }
21188 }
21189 else
21190 mask = gen_rtx_NOT (mode, mask);
21191 emit_insn (gen_rtx_SET (VOIDmode, sgn,
21192 gen_rtx_AND (mode, mask, sign)));
21193 emit_insn (gen_rtx_SET (VOIDmode, result,
21194 gen_rtx_IOR (mode, abs_value, sgn)));
21195 }
21196
21197 /* Expand fabs (OP0) and return a new rtx that holds the result. The
21198 mask for masking out the sign-bit is stored in *SMASK, if that is
21199 non-null. */
21200 static rtx
21201 ix86_expand_sse_fabs (rtx op0, rtx *smask)
21202 {
21203 enum machine_mode mode = GET_MODE (op0);
21204 rtx xa, mask;
21205
21206 xa = gen_reg_rtx (mode);
21207 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
21208 if (!VECTOR_MODE_P (mode))
21209 {
21210 /* We need to generate a scalar mode mask in this case. */
21211 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
21212 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
21213 mask = gen_reg_rtx (mode);
21214 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21215 }
21216 emit_insn (gen_rtx_SET (VOIDmode, xa,
21217 gen_rtx_AND (mode, op0, mask)));
21218
21219 if (smask)
21220 *smask = mask;
21221
21222 return xa;
21223 }
21224
21225 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21226 swapping the operands if SWAP_OPERANDS is true. The expanded
21227 code is a forward jump to a newly created label in case the
21228 comparison is true. The generated label rtx is returned. */
21229 static rtx
21230 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21231 bool swap_operands)
21232 {
21233 rtx label, tmp;
21234
21235 if (swap_operands)
21236 {
21237 tmp = op0;
21238 op0 = op1;
21239 op1 = tmp;
21240 }
21241
21242 label = gen_label_rtx ();
21243 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21244 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21245 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21246 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21247 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21248 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21249 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21250 JUMP_LABEL (tmp) = label;
21251
21252 return label;
21253 }
21254
21255 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21256 using comparison code CODE. Operands are swapped for the comparison if
21257 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21258 static rtx
21259 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21260 bool swap_operands)
21261 {
21262 enum machine_mode mode = GET_MODE (op0);
21263 rtx mask = gen_reg_rtx (mode);
21264
21265 if (swap_operands)
21266 {
21267 rtx tmp = op0;
21268 op0 = op1;
21269 op1 = tmp;
21270 }
21271
21272 if (mode == DFmode)
21273 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21274 gen_rtx_fmt_ee (code, mode, op0, op1)));
21275 else
21276 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21277 gen_rtx_fmt_ee (code, mode, op0, op1)));
21278
21279 return mask;
21280 }
21281
21282 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21283 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21284 static rtx
21285 ix86_gen_TWO52 (enum machine_mode mode)
21286 {
21287 REAL_VALUE_TYPE TWO52r;
21288 rtx TWO52;
21289
21290 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21291 TWO52 = const_double_from_real_value (TWO52r, mode);
21292 TWO52 = force_reg (mode, TWO52);
21293
21294 return TWO52;
21295 }
21296
21297 /* Expand SSE sequence for computing lround from OP1 storing
21298 into OP0. */
21299 void
21300 ix86_expand_lround (rtx op0, rtx op1)
21301 {
21302 /* C code for the stuff we're doing below:
21303 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21304 return (long)tmp;
21305 */
21306 enum machine_mode mode = GET_MODE (op1);
21307 const struct real_format *fmt;
21308 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21309 rtx adj;
21310
21311 /* load nextafter (0.5, 0.0) */
21312 fmt = REAL_MODE_FORMAT (mode);
21313 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21314 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21315
21316 /* adj = copysign (0.5, op1) */
21317 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21318 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21319
21320 /* adj = op1 + adj */
21321 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21322
21323 /* op0 = (imode)adj */
21324 expand_fix (op0, adj, 0);
21325 }
21326
21327 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21328 into OPERAND0. */
21329 void
21330 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21331 {
21332 /* C code for the stuff we're doing below (for do_floor):
21333 xi = (long)op1;
21334 xi -= (double)xi > op1 ? 1 : 0;
21335 return xi;
21336 */
21337 enum machine_mode fmode = GET_MODE (op1);
21338 enum machine_mode imode = GET_MODE (op0);
21339 rtx ireg, freg, label, tmp;
21340
21341 /* reg = (long)op1 */
21342 ireg = gen_reg_rtx (imode);
21343 expand_fix (ireg, op1, 0);
21344
21345 /* freg = (double)reg */
21346 freg = gen_reg_rtx (fmode);
21347 expand_float (freg, ireg, 0);
21348
21349 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21350 label = ix86_expand_sse_compare_and_jump (UNLE,
21351 freg, op1, !do_floor);
21352 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21353 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21354 emit_move_insn (ireg, tmp);
21355
21356 emit_label (label);
21357 LABEL_NUSES (label) = 1;
21358
21359 emit_move_insn (op0, ireg);
21360 }
21361
21362 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21363 result in OPERAND0. */
21364 void
21365 ix86_expand_rint (rtx operand0, rtx operand1)
21366 {
21367 /* C code for the stuff we're doing below:
21368 xa = fabs (operand1);
21369 if (!isless (xa, 2**52))
21370 return operand1;
21371 xa = xa + 2**52 - 2**52;
21372 return copysign (xa, operand1);
21373 */
21374 enum machine_mode mode = GET_MODE (operand0);
21375 rtx res, xa, label, TWO52, mask;
21376
21377 res = gen_reg_rtx (mode);
21378 emit_move_insn (res, operand1);
21379
21380 /* xa = abs (operand1) */
21381 xa = ix86_expand_sse_fabs (res, &mask);
21382
21383 /* if (!isless (xa, TWO52)) goto label; */
21384 TWO52 = ix86_gen_TWO52 (mode);
21385 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21386
21387 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21388 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21389
21390 ix86_sse_copysign_to_positive (res, xa, res, mask);
21391
21392 emit_label (label);
21393 LABEL_NUSES (label) = 1;
21394
21395 emit_move_insn (operand0, res);
21396 }
21397
21398 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21399 into OPERAND0. */
21400 void
21401 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21402 {
21403 /* C code for the stuff we expand below.
21404 double xa = fabs (x), x2;
21405 if (!isless (xa, TWO52))
21406 return x;
21407 xa = xa + TWO52 - TWO52;
21408 x2 = copysign (xa, x);
21409 Compensate. Floor:
21410 if (x2 > x)
21411 x2 -= 1;
21412 Compensate. Ceil:
21413 if (x2 < x)
21414 x2 -= -1;
21415 return x2;
21416 */
21417 enum machine_mode mode = GET_MODE (operand0);
21418 rtx xa, TWO52, tmp, label, one, res, mask;
21419
21420 TWO52 = ix86_gen_TWO52 (mode);
21421
21422 /* Temporary for holding the result, initialized to the input
21423 operand to ease control flow. */
21424 res = gen_reg_rtx (mode);
21425 emit_move_insn (res, operand1);
21426
21427 /* xa = abs (operand1) */
21428 xa = ix86_expand_sse_fabs (res, &mask);
21429
21430 /* if (!isless (xa, TWO52)) goto label; */
21431 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21432
21433 /* xa = xa + TWO52 - TWO52; */
21434 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21435 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21436
21437 /* xa = copysign (xa, operand1) */
21438 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21439
21440 /* generate 1.0 or -1.0 */
21441 one = force_reg (mode,
21442 const_double_from_real_value (do_floor
21443 ? dconst1 : dconstm1, mode));
21444
21445 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21446 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21447 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21448 gen_rtx_AND (mode, one, tmp)));
21449 /* We always need to subtract here to preserve signed zero. */
21450 tmp = expand_simple_binop (mode, MINUS,
21451 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21452 emit_move_insn (res, tmp);
21453
21454 emit_label (label);
21455 LABEL_NUSES (label) = 1;
21456
21457 emit_move_insn (operand0, res);
21458 }
21459
21460 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21461 into OPERAND0. */
21462 void
21463 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21464 {
21465 /* C code for the stuff we expand below.
21466 double xa = fabs (x), x2;
21467 if (!isless (xa, TWO52))
21468 return x;
21469 x2 = (double)(long)x;
21470 Compensate. Floor:
21471 if (x2 > x)
21472 x2 -= 1;
21473 Compensate. Ceil:
21474 if (x2 < x)
21475 x2 += 1;
21476 if (HONOR_SIGNED_ZEROS (mode))
21477 return copysign (x2, x);
21478 return x2;
21479 */
21480 enum machine_mode mode = GET_MODE (operand0);
21481 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21482
21483 TWO52 = ix86_gen_TWO52 (mode);
21484
21485 /* Temporary for holding the result, initialized to the input
21486 operand to ease control flow. */
21487 res = gen_reg_rtx (mode);
21488 emit_move_insn (res, operand1);
21489
21490 /* xa = abs (operand1) */
21491 xa = ix86_expand_sse_fabs (res, &mask);
21492
21493 /* if (!isless (xa, TWO52)) goto label; */
21494 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21495
21496 /* xa = (double)(long)x */
21497 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21498 expand_fix (xi, res, 0);
21499 expand_float (xa, xi, 0);
21500
21501 /* generate 1.0 */
21502 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21503
21504 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21505 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21506 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21507 gen_rtx_AND (mode, one, tmp)));
21508 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21509 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21510 emit_move_insn (res, tmp);
21511
21512 if (HONOR_SIGNED_ZEROS (mode))
21513 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21514
21515 emit_label (label);
21516 LABEL_NUSES (label) = 1;
21517
21518 emit_move_insn (operand0, res);
21519 }
21520
21521 /* Expand SSE sequence for computing round from OPERAND1 storing
21522 into OPERAND0. Sequence that works without relying on DImode truncation
21523 via cvttsd2siq that is only available on 64bit targets. */
21524 void
21525 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21526 {
21527 /* C code for the stuff we expand below.
21528 double xa = fabs (x), xa2, x2;
21529 if (!isless (xa, TWO52))
21530 return x;
21531 Using the absolute value and copying back sign makes
21532 -0.0 -> -0.0 correct.
21533 xa2 = xa + TWO52 - TWO52;
21534 Compensate.
21535 dxa = xa2 - xa;
21536 if (dxa <= -0.5)
21537 xa2 += 1;
21538 else if (dxa > 0.5)
21539 xa2 -= 1;
21540 x2 = copysign (xa2, x);
21541 return x2;
21542 */
21543 enum machine_mode mode = GET_MODE (operand0);
21544 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21545
21546 TWO52 = ix86_gen_TWO52 (mode);
21547
21548 /* Temporary for holding the result, initialized to the input
21549 operand to ease control flow. */
21550 res = gen_reg_rtx (mode);
21551 emit_move_insn (res, operand1);
21552
21553 /* xa = abs (operand1) */
21554 xa = ix86_expand_sse_fabs (res, &mask);
21555
21556 /* if (!isless (xa, TWO52)) goto label; */
21557 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21558
21559 /* xa2 = xa + TWO52 - TWO52; */
21560 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21561 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21562
21563 /* dxa = xa2 - xa; */
21564 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21565
21566 /* generate 0.5, 1.0 and -0.5 */
21567 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21568 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21569 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21570 0, OPTAB_DIRECT);
21571
21572 /* Compensate. */
21573 tmp = gen_reg_rtx (mode);
21574 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21575 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21576 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21577 gen_rtx_AND (mode, one, tmp)));
21578 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21579 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21580 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21581 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21582 gen_rtx_AND (mode, one, tmp)));
21583 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21584
21585 /* res = copysign (xa2, operand1) */
21586 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21587
21588 emit_label (label);
21589 LABEL_NUSES (label) = 1;
21590
21591 emit_move_insn (operand0, res);
21592 }
21593
21594 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21595 into OPERAND0. */
21596 void
21597 ix86_expand_trunc (rtx operand0, rtx operand1)
21598 {
21599 /* C code for SSE variant we expand below.
21600 double xa = fabs (x), x2;
21601 if (!isless (xa, TWO52))
21602 return x;
21603 x2 = (double)(long)x;
21604 if (HONOR_SIGNED_ZEROS (mode))
21605 return copysign (x2, x);
21606 return x2;
21607 */
21608 enum machine_mode mode = GET_MODE (operand0);
21609 rtx xa, xi, TWO52, label, res, mask;
21610
21611 TWO52 = ix86_gen_TWO52 (mode);
21612
21613 /* Temporary for holding the result, initialized to the input
21614 operand to ease control flow. */
21615 res = gen_reg_rtx (mode);
21616 emit_move_insn (res, operand1);
21617
21618 /* xa = abs (operand1) */
21619 xa = ix86_expand_sse_fabs (res, &mask);
21620
21621 /* if (!isless (xa, TWO52)) goto label; */
21622 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21623
21624 /* x = (double)(long)x */
21625 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21626 expand_fix (xi, res, 0);
21627 expand_float (res, xi, 0);
21628
21629 if (HONOR_SIGNED_ZEROS (mode))
21630 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21631
21632 emit_label (label);
21633 LABEL_NUSES (label) = 1;
21634
21635 emit_move_insn (operand0, res);
21636 }
21637
21638 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21639 into OPERAND0. */
21640 void
21641 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21642 {
21643 enum machine_mode mode = GET_MODE (operand0);
21644 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21645
21646 /* C code for SSE variant we expand below.
21647 double xa = fabs (x), x2;
21648 if (!isless (xa, TWO52))
21649 return x;
21650 xa2 = xa + TWO52 - TWO52;
21651 Compensate:
21652 if (xa2 > xa)
21653 xa2 -= 1.0;
21654 x2 = copysign (xa2, x);
21655 return x2;
21656 */
21657
21658 TWO52 = ix86_gen_TWO52 (mode);
21659
21660 /* Temporary for holding the result, initialized to the input
21661 operand to ease control flow. */
21662 res = gen_reg_rtx (mode);
21663 emit_move_insn (res, operand1);
21664
21665 /* xa = abs (operand1) */
21666 xa = ix86_expand_sse_fabs (res, &smask);
21667
21668 /* if (!isless (xa, TWO52)) goto label; */
21669 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21670
21671 /* res = xa + TWO52 - TWO52; */
21672 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21673 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21674 emit_move_insn (res, tmp);
21675
21676 /* generate 1.0 */
21677 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21678
21679 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21680 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21681 emit_insn (gen_rtx_SET (VOIDmode, mask,
21682 gen_rtx_AND (mode, mask, one)));
21683 tmp = expand_simple_binop (mode, MINUS,
21684 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21685 emit_move_insn (res, tmp);
21686
21687 /* res = copysign (res, operand1) */
21688 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21689
21690 emit_label (label);
21691 LABEL_NUSES (label) = 1;
21692
21693 emit_move_insn (operand0, res);
21694 }
21695
21696 /* Expand SSE sequence for computing round from OPERAND1 storing
21697 into OPERAND0. */
21698 void
21699 ix86_expand_round (rtx operand0, rtx operand1)
21700 {
21701 /* C code for the stuff we're doing below:
21702 double xa = fabs (x);
21703 if (!isless (xa, TWO52))
21704 return x;
21705 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21706 return copysign (xa, x);
21707 */
21708 enum machine_mode mode = GET_MODE (operand0);
21709 rtx res, TWO52, xa, label, xi, half, mask;
21710 const struct real_format *fmt;
21711 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21712
21713 /* Temporary for holding the result, initialized to the input
21714 operand to ease control flow. */
21715 res = gen_reg_rtx (mode);
21716 emit_move_insn (res, operand1);
21717
21718 TWO52 = ix86_gen_TWO52 (mode);
21719 xa = ix86_expand_sse_fabs (res, &mask);
21720 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21721
21722 /* load nextafter (0.5, 0.0) */
21723 fmt = REAL_MODE_FORMAT (mode);
21724 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21725 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21726
21727 /* xa = xa + 0.5 */
21728 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21729 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21730
21731 /* xa = (double)(int64_t)xa */
21732 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21733 expand_fix (xi, xa, 0);
21734 expand_float (xa, xi, 0);
21735
21736 /* res = copysign (xa, operand1) */
21737 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21738
21739 emit_label (label);
21740 LABEL_NUSES (label) = 1;
21741
21742 emit_move_insn (operand0, res);
21743 }
21744
21745 #include "gt-i386.h"