45876bde2aeda278b5f643e786140cbdd20fe9e9
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 2, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING. If not, write to
19 the Free Software Foundation, 51 Franklin Street, Fifth Floor,
20 Boston, MA 02110-1301, USA. */
21
22 #include "config.h"
23 #include "system.h"
24 #include "coretypes.h"
25 #include "tm.h"
26 #include "rtl.h"
27 #include "tree.h"
28 #include "tm_p.h"
29 #include "regs.h"
30 #include "hard-reg-set.h"
31 #include "real.h"
32 #include "insn-config.h"
33 #include "conditions.h"
34 #include "output.h"
35 #include "insn-codes.h"
36 #include "insn-attr.h"
37 #include "flags.h"
38 #include "except.h"
39 #include "function.h"
40 #include "recog.h"
41 #include "expr.h"
42 #include "optabs.h"
43 #include "toplev.h"
44 #include "basic-block.h"
45 #include "ggc.h"
46 #include "target.h"
47 #include "target-def.h"
48 #include "langhooks.h"
49 #include "cgraph.h"
50 #include "tree-gimple.h"
51 #include "dwarf2.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of prefetch block */
116 0, /* number of parallel prefetches */
117 2, /* Branch cost */
118 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
119 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
120 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
121 COSTS_N_BYTES (2), /* cost of FABS instruction. */
122 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
123 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
124 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
125 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
128 };
129
130 /* Processor costs (relative to an add) */
131 static const
132 struct processor_costs i386_cost = { /* 386 specific costs */
133 COSTS_N_INSNS (1), /* cost of an add instruction */
134 COSTS_N_INSNS (1), /* cost of a lea instruction */
135 COSTS_N_INSNS (3), /* variable shift costs */
136 COSTS_N_INSNS (2), /* constant shift costs */
137 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
138 COSTS_N_INSNS (6), /* HI */
139 COSTS_N_INSNS (6), /* SI */
140 COSTS_N_INSNS (6), /* DI */
141 COSTS_N_INSNS (6)}, /* other */
142 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
143 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
144 COSTS_N_INSNS (23), /* HI */
145 COSTS_N_INSNS (23), /* SI */
146 COSTS_N_INSNS (23), /* DI */
147 COSTS_N_INSNS (23)}, /* other */
148 COSTS_N_INSNS (3), /* cost of movsx */
149 COSTS_N_INSNS (2), /* cost of movzx */
150 15, /* "large" insn */
151 3, /* MOVE_RATIO */
152 4, /* cost for loading QImode using movzbl */
153 {2, 4, 2}, /* cost of loading integer registers
154 in QImode, HImode and SImode.
155 Relative to reg-reg move (2). */
156 {2, 4, 2}, /* cost of storing integer registers */
157 2, /* cost of reg,reg fld/fst */
158 {8, 8, 8}, /* cost of loading fp registers
159 in SFmode, DFmode and XFmode */
160 {8, 8, 8}, /* cost of storing fp registers
161 in SFmode, DFmode and XFmode */
162 2, /* cost of moving MMX register */
163 {4, 8}, /* cost of loading MMX registers
164 in SImode and DImode */
165 {4, 8}, /* cost of storing MMX registers
166 in SImode and DImode */
167 2, /* cost of moving SSE register */
168 {4, 8, 16}, /* cost of loading SSE registers
169 in SImode, DImode and TImode */
170 {4, 8, 16}, /* cost of storing SSE registers
171 in SImode, DImode and TImode */
172 3, /* MMX or SSE register to integer */
173 0, /* size of prefetch block */
174 0, /* number of parallel prefetches */
175 1, /* Branch cost */
176 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
177 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
178 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
179 COSTS_N_INSNS (22), /* cost of FABS instruction. */
180 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
181 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
182 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
183 DUMMY_STRINGOP_ALGS},
184 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
185 DUMMY_STRINGOP_ALGS},
186 };
187
188 static const
189 struct processor_costs i486_cost = { /* 486 specific costs */
190 COSTS_N_INSNS (1), /* cost of an add instruction */
191 COSTS_N_INSNS (1), /* cost of a lea instruction */
192 COSTS_N_INSNS (3), /* variable shift costs */
193 COSTS_N_INSNS (2), /* constant shift costs */
194 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
195 COSTS_N_INSNS (12), /* HI */
196 COSTS_N_INSNS (12), /* SI */
197 COSTS_N_INSNS (12), /* DI */
198 COSTS_N_INSNS (12)}, /* other */
199 1, /* cost of multiply per each bit set */
200 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
201 COSTS_N_INSNS (40), /* HI */
202 COSTS_N_INSNS (40), /* SI */
203 COSTS_N_INSNS (40), /* DI */
204 COSTS_N_INSNS (40)}, /* other */
205 COSTS_N_INSNS (3), /* cost of movsx */
206 COSTS_N_INSNS (2), /* cost of movzx */
207 15, /* "large" insn */
208 3, /* MOVE_RATIO */
209 4, /* cost for loading QImode using movzbl */
210 {2, 4, 2}, /* cost of loading integer registers
211 in QImode, HImode and SImode.
212 Relative to reg-reg move (2). */
213 {2, 4, 2}, /* cost of storing integer registers */
214 2, /* cost of reg,reg fld/fst */
215 {8, 8, 8}, /* cost of loading fp registers
216 in SFmode, DFmode and XFmode */
217 {8, 8, 8}, /* cost of storing fp registers
218 in SFmode, DFmode and XFmode */
219 2, /* cost of moving MMX register */
220 {4, 8}, /* cost of loading MMX registers
221 in SImode and DImode */
222 {4, 8}, /* cost of storing MMX registers
223 in SImode and DImode */
224 2, /* cost of moving SSE register */
225 {4, 8, 16}, /* cost of loading SSE registers
226 in SImode, DImode and TImode */
227 {4, 8, 16}, /* cost of storing SSE registers
228 in SImode, DImode and TImode */
229 3, /* MMX or SSE register to integer */
230 0, /* size of prefetch block */
231 0, /* number of parallel prefetches */
232 1, /* Branch cost */
233 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
234 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
235 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
236 COSTS_N_INSNS (3), /* cost of FABS instruction. */
237 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
238 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
239 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
240 DUMMY_STRINGOP_ALGS},
241 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
242 DUMMY_STRINGOP_ALGS}
243 };
244
245 static const
246 struct processor_costs pentium_cost = {
247 COSTS_N_INSNS (1), /* cost of an add instruction */
248 COSTS_N_INSNS (1), /* cost of a lea instruction */
249 COSTS_N_INSNS (4), /* variable shift costs */
250 COSTS_N_INSNS (1), /* constant shift costs */
251 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
252 COSTS_N_INSNS (11), /* HI */
253 COSTS_N_INSNS (11), /* SI */
254 COSTS_N_INSNS (11), /* DI */
255 COSTS_N_INSNS (11)}, /* other */
256 0, /* cost of multiply per each bit set */
257 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
258 COSTS_N_INSNS (25), /* HI */
259 COSTS_N_INSNS (25), /* SI */
260 COSTS_N_INSNS (25), /* DI */
261 COSTS_N_INSNS (25)}, /* other */
262 COSTS_N_INSNS (3), /* cost of movsx */
263 COSTS_N_INSNS (2), /* cost of movzx */
264 8, /* "large" insn */
265 6, /* MOVE_RATIO */
266 6, /* cost for loading QImode using movzbl */
267 {2, 4, 2}, /* cost of loading integer registers
268 in QImode, HImode and SImode.
269 Relative to reg-reg move (2). */
270 {2, 4, 2}, /* cost of storing integer registers */
271 2, /* cost of reg,reg fld/fst */
272 {2, 2, 6}, /* cost of loading fp registers
273 in SFmode, DFmode and XFmode */
274 {4, 4, 6}, /* cost of storing fp registers
275 in SFmode, DFmode and XFmode */
276 8, /* cost of moving MMX register */
277 {8, 8}, /* cost of loading MMX registers
278 in SImode and DImode */
279 {8, 8}, /* cost of storing MMX registers
280 in SImode and DImode */
281 2, /* cost of moving SSE register */
282 {4, 8, 16}, /* cost of loading SSE registers
283 in SImode, DImode and TImode */
284 {4, 8, 16}, /* cost of storing SSE registers
285 in SImode, DImode and TImode */
286 3, /* MMX or SSE register to integer */
287 0, /* size of prefetch block */
288 0, /* number of parallel prefetches */
289 2, /* Branch cost */
290 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
291 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
292 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
293 COSTS_N_INSNS (1), /* cost of FABS instruction. */
294 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
295 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
296 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
297 DUMMY_STRINGOP_ALGS},
298 {{libcall, {{-1, rep_prefix_4_byte}}},
299 DUMMY_STRINGOP_ALGS}
300 };
301
302 static const
303 struct processor_costs pentiumpro_cost = {
304 COSTS_N_INSNS (1), /* cost of an add instruction */
305 COSTS_N_INSNS (1), /* cost of a lea instruction */
306 COSTS_N_INSNS (1), /* variable shift costs */
307 COSTS_N_INSNS (1), /* constant shift costs */
308 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
309 COSTS_N_INSNS (4), /* HI */
310 COSTS_N_INSNS (4), /* SI */
311 COSTS_N_INSNS (4), /* DI */
312 COSTS_N_INSNS (4)}, /* other */
313 0, /* cost of multiply per each bit set */
314 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
315 COSTS_N_INSNS (17), /* HI */
316 COSTS_N_INSNS (17), /* SI */
317 COSTS_N_INSNS (17), /* DI */
318 COSTS_N_INSNS (17)}, /* other */
319 COSTS_N_INSNS (1), /* cost of movsx */
320 COSTS_N_INSNS (1), /* cost of movzx */
321 8, /* "large" insn */
322 6, /* MOVE_RATIO */
323 2, /* cost for loading QImode using movzbl */
324 {4, 4, 4}, /* cost of loading integer registers
325 in QImode, HImode and SImode.
326 Relative to reg-reg move (2). */
327 {2, 2, 2}, /* cost of storing integer registers */
328 2, /* cost of reg,reg fld/fst */
329 {2, 2, 6}, /* cost of loading fp registers
330 in SFmode, DFmode and XFmode */
331 {4, 4, 6}, /* cost of storing fp registers
332 in SFmode, DFmode and XFmode */
333 2, /* cost of moving MMX register */
334 {2, 2}, /* cost of loading MMX registers
335 in SImode and DImode */
336 {2, 2}, /* cost of storing MMX registers
337 in SImode and DImode */
338 2, /* cost of moving SSE register */
339 {2, 2, 8}, /* cost of loading SSE registers
340 in SImode, DImode and TImode */
341 {2, 2, 8}, /* cost of storing SSE registers
342 in SImode, DImode and TImode */
343 3, /* MMX or SSE register to integer */
344 32, /* size of prefetch block */
345 6, /* number of parallel prefetches */
346 2, /* Branch cost */
347 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
348 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
349 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
350 COSTS_N_INSNS (2), /* cost of FABS instruction. */
351 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
352 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
353 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
354 the alignment). For small blocks inline loop is still a noticeable win, for bigger
355 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
356 more expensive startup time in CPU, but after 4K the difference is down in the noise.
357 */
358 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
359 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
360 DUMMY_STRINGOP_ALGS},
361 {{rep_prefix_4_byte, {{1024, unrolled_loop},
362 {8192, rep_prefix_4_byte}, {-1, libcall}}},
363 DUMMY_STRINGOP_ALGS}
364 };
365
366 static const
367 struct processor_costs geode_cost = {
368 COSTS_N_INSNS (1), /* cost of an add instruction */
369 COSTS_N_INSNS (1), /* cost of a lea instruction */
370 COSTS_N_INSNS (2), /* variable shift costs */
371 COSTS_N_INSNS (1), /* constant shift costs */
372 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
373 COSTS_N_INSNS (4), /* HI */
374 COSTS_N_INSNS (7), /* SI */
375 COSTS_N_INSNS (7), /* DI */
376 COSTS_N_INSNS (7)}, /* other */
377 0, /* cost of multiply per each bit set */
378 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
379 COSTS_N_INSNS (23), /* HI */
380 COSTS_N_INSNS (39), /* SI */
381 COSTS_N_INSNS (39), /* DI */
382 COSTS_N_INSNS (39)}, /* other */
383 COSTS_N_INSNS (1), /* cost of movsx */
384 COSTS_N_INSNS (1), /* cost of movzx */
385 8, /* "large" insn */
386 4, /* MOVE_RATIO */
387 1, /* cost for loading QImode using movzbl */
388 {1, 1, 1}, /* cost of loading integer registers
389 in QImode, HImode and SImode.
390 Relative to reg-reg move (2). */
391 {1, 1, 1}, /* cost of storing integer registers */
392 1, /* cost of reg,reg fld/fst */
393 {1, 1, 1}, /* cost of loading fp registers
394 in SFmode, DFmode and XFmode */
395 {4, 6, 6}, /* cost of storing fp registers
396 in SFmode, DFmode and XFmode */
397
398 1, /* cost of moving MMX register */
399 {1, 1}, /* cost of loading MMX registers
400 in SImode and DImode */
401 {1, 1}, /* cost of storing MMX registers
402 in SImode and DImode */
403 1, /* cost of moving SSE register */
404 {1, 1, 1}, /* cost of loading SSE registers
405 in SImode, DImode and TImode */
406 {1, 1, 1}, /* cost of storing SSE registers
407 in SImode, DImode and TImode */
408 1, /* MMX or SSE register to integer */
409 32, /* size of prefetch block */
410 1, /* number of parallel prefetches */
411 1, /* Branch cost */
412 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
413 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
414 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
415 COSTS_N_INSNS (1), /* cost of FABS instruction. */
416 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
417 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
418 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
419 DUMMY_STRINGOP_ALGS},
420 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
421 DUMMY_STRINGOP_ALGS}
422 };
423
424 static const
425 struct processor_costs k6_cost = {
426 COSTS_N_INSNS (1), /* cost of an add instruction */
427 COSTS_N_INSNS (2), /* cost of a lea instruction */
428 COSTS_N_INSNS (1), /* variable shift costs */
429 COSTS_N_INSNS (1), /* constant shift costs */
430 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
431 COSTS_N_INSNS (3), /* HI */
432 COSTS_N_INSNS (3), /* SI */
433 COSTS_N_INSNS (3), /* DI */
434 COSTS_N_INSNS (3)}, /* other */
435 0, /* cost of multiply per each bit set */
436 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
437 COSTS_N_INSNS (18), /* HI */
438 COSTS_N_INSNS (18), /* SI */
439 COSTS_N_INSNS (18), /* DI */
440 COSTS_N_INSNS (18)}, /* other */
441 COSTS_N_INSNS (2), /* cost of movsx */
442 COSTS_N_INSNS (2), /* cost of movzx */
443 8, /* "large" insn */
444 4, /* MOVE_RATIO */
445 3, /* cost for loading QImode using movzbl */
446 {4, 5, 4}, /* cost of loading integer registers
447 in QImode, HImode and SImode.
448 Relative to reg-reg move (2). */
449 {2, 3, 2}, /* cost of storing integer registers */
450 4, /* cost of reg,reg fld/fst */
451 {6, 6, 6}, /* cost of loading fp registers
452 in SFmode, DFmode and XFmode */
453 {4, 4, 4}, /* cost of storing fp registers
454 in SFmode, DFmode and XFmode */
455 2, /* cost of moving MMX register */
456 {2, 2}, /* cost of loading MMX registers
457 in SImode and DImode */
458 {2, 2}, /* cost of storing MMX registers
459 in SImode and DImode */
460 2, /* cost of moving SSE register */
461 {2, 2, 8}, /* cost of loading SSE registers
462 in SImode, DImode and TImode */
463 {2, 2, 8}, /* cost of storing SSE registers
464 in SImode, DImode and TImode */
465 6, /* MMX or SSE register to integer */
466 32, /* size of prefetch block */
467 1, /* number of parallel prefetches */
468 1, /* Branch cost */
469 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
470 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
471 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
472 COSTS_N_INSNS (2), /* cost of FABS instruction. */
473 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
474 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
475 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
476 DUMMY_STRINGOP_ALGS},
477 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
478 DUMMY_STRINGOP_ALGS}
479 };
480
481 static const
482 struct processor_costs athlon_cost = {
483 COSTS_N_INSNS (1), /* cost of an add instruction */
484 COSTS_N_INSNS (2), /* cost of a lea instruction */
485 COSTS_N_INSNS (1), /* variable shift costs */
486 COSTS_N_INSNS (1), /* constant shift costs */
487 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
488 COSTS_N_INSNS (5), /* HI */
489 COSTS_N_INSNS (5), /* SI */
490 COSTS_N_INSNS (5), /* DI */
491 COSTS_N_INSNS (5)}, /* other */
492 0, /* cost of multiply per each bit set */
493 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
494 COSTS_N_INSNS (26), /* HI */
495 COSTS_N_INSNS (42), /* SI */
496 COSTS_N_INSNS (74), /* DI */
497 COSTS_N_INSNS (74)}, /* other */
498 COSTS_N_INSNS (1), /* cost of movsx */
499 COSTS_N_INSNS (1), /* cost of movzx */
500 8, /* "large" insn */
501 9, /* MOVE_RATIO */
502 4, /* cost for loading QImode using movzbl */
503 {3, 4, 3}, /* cost of loading integer registers
504 in QImode, HImode and SImode.
505 Relative to reg-reg move (2). */
506 {3, 4, 3}, /* cost of storing integer registers */
507 4, /* cost of reg,reg fld/fst */
508 {4, 4, 12}, /* cost of loading fp registers
509 in SFmode, DFmode and XFmode */
510 {6, 6, 8}, /* cost of storing fp registers
511 in SFmode, DFmode and XFmode */
512 2, /* cost of moving MMX register */
513 {4, 4}, /* cost of loading MMX registers
514 in SImode and DImode */
515 {4, 4}, /* cost of storing MMX registers
516 in SImode and DImode */
517 2, /* cost of moving SSE register */
518 {4, 4, 6}, /* cost of loading SSE registers
519 in SImode, DImode and TImode */
520 {4, 4, 5}, /* cost of storing SSE registers
521 in SImode, DImode and TImode */
522 5, /* MMX or SSE register to integer */
523 64, /* size of prefetch block */
524 6, /* number of parallel prefetches */
525 5, /* Branch cost */
526 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
527 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
528 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
529 COSTS_N_INSNS (2), /* cost of FABS instruction. */
530 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
531 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
532 /* For some reason, Athlon deals better with REP prefix (relative to loops)
533 compared to K8. Alignment becomes important after 8 bytes for memcpy and
534 128 bytes for memset. */
535 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
536 DUMMY_STRINGOP_ALGS},
537 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
538 DUMMY_STRINGOP_ALGS}
539 };
540
541 static const
542 struct processor_costs k8_cost = {
543 COSTS_N_INSNS (1), /* cost of an add instruction */
544 COSTS_N_INSNS (2), /* cost of a lea instruction */
545 COSTS_N_INSNS (1), /* variable shift costs */
546 COSTS_N_INSNS (1), /* constant shift costs */
547 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
548 COSTS_N_INSNS (4), /* HI */
549 COSTS_N_INSNS (3), /* SI */
550 COSTS_N_INSNS (4), /* DI */
551 COSTS_N_INSNS (5)}, /* other */
552 0, /* cost of multiply per each bit set */
553 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
554 COSTS_N_INSNS (26), /* HI */
555 COSTS_N_INSNS (42), /* SI */
556 COSTS_N_INSNS (74), /* DI */
557 COSTS_N_INSNS (74)}, /* other */
558 COSTS_N_INSNS (1), /* cost of movsx */
559 COSTS_N_INSNS (1), /* cost of movzx */
560 8, /* "large" insn */
561 9, /* MOVE_RATIO */
562 4, /* cost for loading QImode using movzbl */
563 {3, 4, 3}, /* cost of loading integer registers
564 in QImode, HImode and SImode.
565 Relative to reg-reg move (2). */
566 {3, 4, 3}, /* cost of storing integer registers */
567 4, /* cost of reg,reg fld/fst */
568 {4, 4, 12}, /* cost of loading fp registers
569 in SFmode, DFmode and XFmode */
570 {6, 6, 8}, /* cost of storing fp registers
571 in SFmode, DFmode and XFmode */
572 2, /* cost of moving MMX register */
573 {3, 3}, /* cost of loading MMX registers
574 in SImode and DImode */
575 {4, 4}, /* cost of storing MMX registers
576 in SImode and DImode */
577 2, /* cost of moving SSE register */
578 {4, 3, 6}, /* cost of loading SSE registers
579 in SImode, DImode and TImode */
580 {4, 4, 5}, /* cost of storing SSE registers
581 in SImode, DImode and TImode */
582 5, /* MMX or SSE register to integer */
583 64, /* size of prefetch block */
584 /* New AMD processors never drop prefetches; if they cannot be performed
585 immediately, they are queued. We set number of simultaneous prefetches
586 to a large constant to reflect this (it probably is not a good idea not
587 to limit number of prefetches at all, as their execution also takes some
588 time). */
589 100, /* number of parallel prefetches */
590 5, /* Branch cost */
591 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
592 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
593 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
594 COSTS_N_INSNS (2), /* cost of FABS instruction. */
595 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
596 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
597 /* K8 has optimized REP instruction for medium sized blocks, but for very small
598 blocks it is better to use loop. For large blocks, libcall can do
599 nontemporary accesses and beat inline considerably. */
600 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
601 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
602 {{libcall, {{8, loop}, {24, unrolled_loop},
603 {2048, rep_prefix_4_byte}, {-1, libcall}}},
604 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
605 };
606
607 struct processor_costs amdfam10_cost = {
608 COSTS_N_INSNS (1), /* cost of an add instruction */
609 COSTS_N_INSNS (2), /* cost of a lea instruction */
610 COSTS_N_INSNS (1), /* variable shift costs */
611 COSTS_N_INSNS (1), /* constant shift costs */
612 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
613 COSTS_N_INSNS (4), /* HI */
614 COSTS_N_INSNS (3), /* SI */
615 COSTS_N_INSNS (4), /* DI */
616 COSTS_N_INSNS (5)}, /* other */
617 0, /* cost of multiply per each bit set */
618 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
619 COSTS_N_INSNS (35), /* HI */
620 COSTS_N_INSNS (51), /* SI */
621 COSTS_N_INSNS (83), /* DI */
622 COSTS_N_INSNS (83)}, /* other */
623 COSTS_N_INSNS (1), /* cost of movsx */
624 COSTS_N_INSNS (1), /* cost of movzx */
625 8, /* "large" insn */
626 9, /* MOVE_RATIO */
627 4, /* cost for loading QImode using movzbl */
628 {3, 4, 3}, /* cost of loading integer registers
629 in QImode, HImode and SImode.
630 Relative to reg-reg move (2). */
631 {3, 4, 3}, /* cost of storing integer registers */
632 4, /* cost of reg,reg fld/fst */
633 {4, 4, 12}, /* cost of loading fp registers
634 in SFmode, DFmode and XFmode */
635 {6, 6, 8}, /* cost of storing fp registers
636 in SFmode, DFmode and XFmode */
637 2, /* cost of moving MMX register */
638 {3, 3}, /* cost of loading MMX registers
639 in SImode and DImode */
640 {4, 4}, /* cost of storing MMX registers
641 in SImode and DImode */
642 2, /* cost of moving SSE register */
643 {4, 4, 3}, /* cost of loading SSE registers
644 in SImode, DImode and TImode */
645 {4, 4, 5}, /* cost of storing SSE registers
646 in SImode, DImode and TImode */
647 3, /* MMX or SSE register to integer */
648 /* On K8
649 MOVD reg64, xmmreg Double FSTORE 4
650 MOVD reg32, xmmreg Double FSTORE 4
651 On AMDFAM10
652 MOVD reg64, xmmreg Double FADD 3
653 1/1 1/1
654 MOVD reg32, xmmreg Double FADD 3
655 1/1 1/1 */
656 64, /* size of prefetch block */
657 /* New AMD processors never drop prefetches; if they cannot be performed
658 immediately, they are queued. We set number of simultaneous prefetches
659 to a large constant to reflect this (it probably is not a good idea not
660 to limit number of prefetches at all, as their execution also takes some
661 time). */
662 100, /* number of parallel prefetches */
663 5, /* Branch cost */
664 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
665 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
666 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
667 COSTS_N_INSNS (2), /* cost of FABS instruction. */
668 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
669 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
670
671 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
672 very small blocks it is better to use loop. For large blocks, libcall can
673 do nontemporary accesses and beat inline considerably. */
674 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
675 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
676 {{libcall, {{8, loop}, {24, unrolled_loop},
677 {2048, rep_prefix_4_byte}, {-1, libcall}}},
678 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
679 };
680
681 static const
682 struct processor_costs pentium4_cost = {
683 COSTS_N_INSNS (1), /* cost of an add instruction */
684 COSTS_N_INSNS (3), /* cost of a lea instruction */
685 COSTS_N_INSNS (4), /* variable shift costs */
686 COSTS_N_INSNS (4), /* constant shift costs */
687 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
688 COSTS_N_INSNS (15), /* HI */
689 COSTS_N_INSNS (15), /* SI */
690 COSTS_N_INSNS (15), /* DI */
691 COSTS_N_INSNS (15)}, /* other */
692 0, /* cost of multiply per each bit set */
693 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
694 COSTS_N_INSNS (56), /* HI */
695 COSTS_N_INSNS (56), /* SI */
696 COSTS_N_INSNS (56), /* DI */
697 COSTS_N_INSNS (56)}, /* other */
698 COSTS_N_INSNS (1), /* cost of movsx */
699 COSTS_N_INSNS (1), /* cost of movzx */
700 16, /* "large" insn */
701 6, /* MOVE_RATIO */
702 2, /* cost for loading QImode using movzbl */
703 {4, 5, 4}, /* cost of loading integer registers
704 in QImode, HImode and SImode.
705 Relative to reg-reg move (2). */
706 {2, 3, 2}, /* cost of storing integer registers */
707 2, /* cost of reg,reg fld/fst */
708 {2, 2, 6}, /* cost of loading fp registers
709 in SFmode, DFmode and XFmode */
710 {4, 4, 6}, /* cost of storing fp registers
711 in SFmode, DFmode and XFmode */
712 2, /* cost of moving MMX register */
713 {2, 2}, /* cost of loading MMX registers
714 in SImode and DImode */
715 {2, 2}, /* cost of storing MMX registers
716 in SImode and DImode */
717 12, /* cost of moving SSE register */
718 {12, 12, 12}, /* cost of loading SSE registers
719 in SImode, DImode and TImode */
720 {2, 2, 8}, /* cost of storing SSE registers
721 in SImode, DImode and TImode */
722 10, /* MMX or SSE register to integer */
723 64, /* size of prefetch block */
724 6, /* number of parallel prefetches */
725 2, /* Branch cost */
726 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
727 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
728 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
729 COSTS_N_INSNS (2), /* cost of FABS instruction. */
730 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
731 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
732 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
733 DUMMY_STRINGOP_ALGS},
734 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
735 {-1, libcall}}},
736 DUMMY_STRINGOP_ALGS},
737 };
738
739 static const
740 struct processor_costs nocona_cost = {
741 COSTS_N_INSNS (1), /* cost of an add instruction */
742 COSTS_N_INSNS (1), /* cost of a lea instruction */
743 COSTS_N_INSNS (1), /* variable shift costs */
744 COSTS_N_INSNS (1), /* constant shift costs */
745 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
746 COSTS_N_INSNS (10), /* HI */
747 COSTS_N_INSNS (10), /* SI */
748 COSTS_N_INSNS (10), /* DI */
749 COSTS_N_INSNS (10)}, /* other */
750 0, /* cost of multiply per each bit set */
751 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
752 COSTS_N_INSNS (66), /* HI */
753 COSTS_N_INSNS (66), /* SI */
754 COSTS_N_INSNS (66), /* DI */
755 COSTS_N_INSNS (66)}, /* other */
756 COSTS_N_INSNS (1), /* cost of movsx */
757 COSTS_N_INSNS (1), /* cost of movzx */
758 16, /* "large" insn */
759 17, /* MOVE_RATIO */
760 4, /* cost for loading QImode using movzbl */
761 {4, 4, 4}, /* cost of loading integer registers
762 in QImode, HImode and SImode.
763 Relative to reg-reg move (2). */
764 {4, 4, 4}, /* cost of storing integer registers */
765 3, /* cost of reg,reg fld/fst */
766 {12, 12, 12}, /* cost of loading fp registers
767 in SFmode, DFmode and XFmode */
768 {4, 4, 4}, /* cost of storing fp registers
769 in SFmode, DFmode and XFmode */
770 6, /* cost of moving MMX register */
771 {12, 12}, /* cost of loading MMX registers
772 in SImode and DImode */
773 {12, 12}, /* cost of storing MMX registers
774 in SImode and DImode */
775 6, /* cost of moving SSE register */
776 {12, 12, 12}, /* cost of loading SSE registers
777 in SImode, DImode and TImode */
778 {12, 12, 12}, /* cost of storing SSE registers
779 in SImode, DImode and TImode */
780 8, /* MMX or SSE register to integer */
781 128, /* size of prefetch block */
782 8, /* number of parallel prefetches */
783 1, /* Branch cost */
784 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
785 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
786 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
787 COSTS_N_INSNS (3), /* cost of FABS instruction. */
788 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
789 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
790 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
791 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
792 {100000, unrolled_loop}, {-1, libcall}}}},
793 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
794 {-1, libcall}}},
795 {libcall, {{24, loop}, {64, unrolled_loop},
796 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
797 };
798
799 static const
800 struct processor_costs core2_cost = {
801 COSTS_N_INSNS (1), /* cost of an add instruction */
802 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
803 COSTS_N_INSNS (1), /* variable shift costs */
804 COSTS_N_INSNS (1), /* constant shift costs */
805 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
806 COSTS_N_INSNS (3), /* HI */
807 COSTS_N_INSNS (3), /* SI */
808 COSTS_N_INSNS (3), /* DI */
809 COSTS_N_INSNS (3)}, /* other */
810 0, /* cost of multiply per each bit set */
811 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
812 COSTS_N_INSNS (22), /* HI */
813 COSTS_N_INSNS (22), /* SI */
814 COSTS_N_INSNS (22), /* DI */
815 COSTS_N_INSNS (22)}, /* other */
816 COSTS_N_INSNS (1), /* cost of movsx */
817 COSTS_N_INSNS (1), /* cost of movzx */
818 8, /* "large" insn */
819 16, /* MOVE_RATIO */
820 2, /* cost for loading QImode using movzbl */
821 {6, 6, 6}, /* cost of loading integer registers
822 in QImode, HImode and SImode.
823 Relative to reg-reg move (2). */
824 {4, 4, 4}, /* cost of storing integer registers */
825 2, /* cost of reg,reg fld/fst */
826 {6, 6, 6}, /* cost of loading fp registers
827 in SFmode, DFmode and XFmode */
828 {4, 4, 4}, /* cost of loading integer registers */
829 2, /* cost of moving MMX register */
830 {6, 6}, /* cost of loading MMX registers
831 in SImode and DImode */
832 {4, 4}, /* cost of storing MMX registers
833 in SImode and DImode */
834 2, /* cost of moving SSE register */
835 {6, 6, 6}, /* cost of loading SSE registers
836 in SImode, DImode and TImode */
837 {4, 4, 4}, /* cost of storing SSE registers
838 in SImode, DImode and TImode */
839 2, /* MMX or SSE register to integer */
840 128, /* size of prefetch block */
841 8, /* number of parallel prefetches */
842 3, /* Branch cost */
843 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
844 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
845 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
846 COSTS_N_INSNS (1), /* cost of FABS instruction. */
847 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
848 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
849 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
850 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
851 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
852 {{libcall, {{8, loop}, {15, unrolled_loop},
853 {2048, rep_prefix_4_byte}, {-1, libcall}}},
854 {libcall, {{24, loop}, {32, unrolled_loop},
855 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
856 };
857
858 /* Generic64 should produce code tuned for Nocona and K8. */
859 static const
860 struct processor_costs generic64_cost = {
861 COSTS_N_INSNS (1), /* cost of an add instruction */
862 /* On all chips taken into consideration lea is 2 cycles and more. With
863 this cost however our current implementation of synth_mult results in
864 use of unnecessary temporary registers causing regression on several
865 SPECfp benchmarks. */
866 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
867 COSTS_N_INSNS (1), /* variable shift costs */
868 COSTS_N_INSNS (1), /* constant shift costs */
869 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
870 COSTS_N_INSNS (4), /* HI */
871 COSTS_N_INSNS (3), /* SI */
872 COSTS_N_INSNS (4), /* DI */
873 COSTS_N_INSNS (2)}, /* other */
874 0, /* cost of multiply per each bit set */
875 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
876 COSTS_N_INSNS (26), /* HI */
877 COSTS_N_INSNS (42), /* SI */
878 COSTS_N_INSNS (74), /* DI */
879 COSTS_N_INSNS (74)}, /* other */
880 COSTS_N_INSNS (1), /* cost of movsx */
881 COSTS_N_INSNS (1), /* cost of movzx */
882 8, /* "large" insn */
883 17, /* MOVE_RATIO */
884 4, /* cost for loading QImode using movzbl */
885 {4, 4, 4}, /* cost of loading integer registers
886 in QImode, HImode and SImode.
887 Relative to reg-reg move (2). */
888 {4, 4, 4}, /* cost of storing integer registers */
889 4, /* cost of reg,reg fld/fst */
890 {12, 12, 12}, /* cost of loading fp registers
891 in SFmode, DFmode and XFmode */
892 {6, 6, 8}, /* cost of storing fp registers
893 in SFmode, DFmode and XFmode */
894 2, /* cost of moving MMX register */
895 {8, 8}, /* cost of loading MMX registers
896 in SImode and DImode */
897 {8, 8}, /* cost of storing MMX registers
898 in SImode and DImode */
899 2, /* cost of moving SSE register */
900 {8, 8, 8}, /* cost of loading SSE registers
901 in SImode, DImode and TImode */
902 {8, 8, 8}, /* cost of storing SSE registers
903 in SImode, DImode and TImode */
904 5, /* MMX or SSE register to integer */
905 64, /* size of prefetch block */
906 6, /* number of parallel prefetches */
907 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
908 is increased to perhaps more appropriate value of 5. */
909 3, /* Branch cost */
910 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
911 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
912 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
913 COSTS_N_INSNS (8), /* cost of FABS instruction. */
914 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
915 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
916 {DUMMY_STRINGOP_ALGS,
917 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
918 {DUMMY_STRINGOP_ALGS,
919 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
920 };
921
922 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
923 static const
924 struct processor_costs generic32_cost = {
925 COSTS_N_INSNS (1), /* cost of an add instruction */
926 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
927 COSTS_N_INSNS (1), /* variable shift costs */
928 COSTS_N_INSNS (1), /* constant shift costs */
929 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
930 COSTS_N_INSNS (4), /* HI */
931 COSTS_N_INSNS (3), /* SI */
932 COSTS_N_INSNS (4), /* DI */
933 COSTS_N_INSNS (2)}, /* other */
934 0, /* cost of multiply per each bit set */
935 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
936 COSTS_N_INSNS (26), /* HI */
937 COSTS_N_INSNS (42), /* SI */
938 COSTS_N_INSNS (74), /* DI */
939 COSTS_N_INSNS (74)}, /* other */
940 COSTS_N_INSNS (1), /* cost of movsx */
941 COSTS_N_INSNS (1), /* cost of movzx */
942 8, /* "large" insn */
943 17, /* MOVE_RATIO */
944 4, /* cost for loading QImode using movzbl */
945 {4, 4, 4}, /* cost of loading integer registers
946 in QImode, HImode and SImode.
947 Relative to reg-reg move (2). */
948 {4, 4, 4}, /* cost of storing integer registers */
949 4, /* cost of reg,reg fld/fst */
950 {12, 12, 12}, /* cost of loading fp registers
951 in SFmode, DFmode and XFmode */
952 {6, 6, 8}, /* cost of storing fp registers
953 in SFmode, DFmode and XFmode */
954 2, /* cost of moving MMX register */
955 {8, 8}, /* cost of loading MMX registers
956 in SImode and DImode */
957 {8, 8}, /* cost of storing MMX registers
958 in SImode and DImode */
959 2, /* cost of moving SSE register */
960 {8, 8, 8}, /* cost of loading SSE registers
961 in SImode, DImode and TImode */
962 {8, 8, 8}, /* cost of storing SSE registers
963 in SImode, DImode and TImode */
964 5, /* MMX or SSE register to integer */
965 64, /* size of prefetch block */
966 6, /* number of parallel prefetches */
967 3, /* Branch cost */
968 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
969 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
970 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
971 COSTS_N_INSNS (8), /* cost of FABS instruction. */
972 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
973 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
974 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
975 DUMMY_STRINGOP_ALGS},
976 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
977 DUMMY_STRINGOP_ALGS},
978 };
979
980 const struct processor_costs *ix86_cost = &pentium_cost;
981
982 /* Processor feature/optimization bitmasks. */
983 #define m_386 (1<<PROCESSOR_I386)
984 #define m_486 (1<<PROCESSOR_I486)
985 #define m_PENT (1<<PROCESSOR_PENTIUM)
986 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
987 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
988 #define m_NOCONA (1<<PROCESSOR_NOCONA)
989 #define m_CORE2 (1<<PROCESSOR_CORE2)
990
991 #define m_GEODE (1<<PROCESSOR_GEODE)
992 #define m_K6 (1<<PROCESSOR_K6)
993 #define m_K6_GEODE (m_K6 | m_GEODE)
994 #define m_K8 (1<<PROCESSOR_K8)
995 #define m_ATHLON (1<<PROCESSOR_ATHLON)
996 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
997 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
998 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
999
1000 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1001 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1002
1003 /* Generic instruction choice should be common subset of supported CPUs
1004 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1005 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1006
1007 /* Feature tests against the various tunings. */
1008 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1009 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1010 negatively, so enabling for Generic64 seems like good code size
1011 tradeoff. We can't enable it for 32bit generic because it does not
1012 work well with PPro base chips. */
1013 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1014
1015 /* X86_TUNE_PUSH_MEMORY */
1016 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1017 | m_NOCONA | m_CORE2 | m_GENERIC,
1018
1019 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1020 m_486 | m_PENT,
1021
1022 /* X86_TUNE_USE_BIT_TEST */
1023 m_386,
1024
1025 /* X86_TUNE_UNROLL_STRLEN */
1026 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1027
1028 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1029 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1030 | m_NOCONA | m_CORE2 | m_GENERIC,
1031
1032 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1033 on simulation result. But after P4 was made, no performance benefit
1034 was observed with branch hints. It also increases the code size.
1035 As a result, icc never generates branch hints. */
1036 0,
1037
1038 /* X86_TUNE_DOUBLE_WITH_ADD */
1039 ~m_386,
1040
1041 /* X86_TUNE_USE_SAHF */
1042 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1043 | m_NOCONA | m_CORE2 | m_GENERIC,
1044
1045 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1046 partial dependencies. */
1047 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1048 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1049
1050 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1051 register stalls on Generic32 compilation setting as well. However
1052 in current implementation the partial register stalls are not eliminated
1053 very well - they can be introduced via subregs synthesized by combine
1054 and can happen in caller/callee saving sequences. Because this option
1055 pays back little on PPro based chips and is in conflict with partial reg
1056 dependencies used by Athlon/P4 based chips, it is better to leave it off
1057 for generic32 for now. */
1058 m_PPRO,
1059
1060 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1061 m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_USE_HIMODE_FIOP */
1064 m_386 | m_486 | m_K6_GEODE,
1065
1066 /* X86_TUNE_USE_SIMODE_FIOP */
1067 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1068
1069 /* X86_TUNE_USE_MOV0 */
1070 m_K6,
1071
1072 /* X86_TUNE_USE_CLTD */
1073 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1074
1075 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1076 m_PENT4,
1077
1078 /* X86_TUNE_SPLIT_LONG_MOVES */
1079 m_PPRO,
1080
1081 /* X86_TUNE_READ_MODIFY_WRITE */
1082 ~m_PENT,
1083
1084 /* X86_TUNE_READ_MODIFY */
1085 ~(m_PENT | m_PPRO),
1086
1087 /* X86_TUNE_PROMOTE_QIMODE */
1088 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1089 | m_GENERIC /* | m_PENT4 ? */,
1090
1091 /* X86_TUNE_FAST_PREFIX */
1092 ~(m_PENT | m_486 | m_386),
1093
1094 /* X86_TUNE_SINGLE_STRINGOP */
1095 m_386 | m_PENT4 | m_NOCONA,
1096
1097 /* X86_TUNE_QIMODE_MATH */
1098 ~0,
1099
1100 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1101 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1102 might be considered for Generic32 if our scheme for avoiding partial
1103 stalls was more effective. */
1104 ~m_PPRO,
1105
1106 /* X86_TUNE_PROMOTE_QI_REGS */
1107 0,
1108
1109 /* X86_TUNE_PROMOTE_HI_REGS */
1110 m_PPRO,
1111
1112 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1113 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1114
1115 /* X86_TUNE_ADD_ESP_8 */
1116 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1117 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1118
1119 /* X86_TUNE_SUB_ESP_4 */
1120 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1121
1122 /* X86_TUNE_SUB_ESP_8 */
1123 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1124 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1125
1126 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1127 for DFmode copies */
1128 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1129 | m_GENERIC | m_GEODE),
1130
1131 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1132 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1133
1134 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1135 conflict here in between PPro/Pentium4 based chips that thread 128bit
1136 SSE registers as single units versus K8 based chips that divide SSE
1137 registers to two 64bit halves. This knob promotes all store destinations
1138 to be 128bit to allow register renaming on 128bit SSE units, but usually
1139 results in one extra microop on 64bit SSE units. Experimental results
1140 shows that disabling this option on P4 brings over 20% SPECfp regression,
1141 while enabling it on K8 brings roughly 2.4% regression that can be partly
1142 masked by careful scheduling of moves. */
1143 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1144
1145 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1146 m_AMDFAM10,
1147
1148 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1149 are resolved on SSE register parts instead of whole registers, so we may
1150 maintain just lower part of scalar values in proper format leaving the
1151 upper part undefined. */
1152 m_ATHLON_K8,
1153
1154 /* X86_TUNE_SSE_TYPELESS_STORES */
1155 m_ATHLON_K8_AMDFAM10,
1156
1157 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1158 m_PPRO | m_PENT4 | m_NOCONA,
1159
1160 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1161 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1162
1163 /* X86_TUNE_PROLOGUE_USING_MOVE */
1164 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1165
1166 /* X86_TUNE_EPILOGUE_USING_MOVE */
1167 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1168
1169 /* X86_TUNE_SHIFT1 */
1170 ~m_486,
1171
1172 /* X86_TUNE_USE_FFREEP */
1173 m_ATHLON_K8_AMDFAM10,
1174
1175 /* X86_TUNE_INTER_UNIT_MOVES */
1176 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1177
1178 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1179 than 4 branch instructions in the 16 byte window. */
1180 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1181
1182 /* X86_TUNE_SCHEDULE */
1183 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1184
1185 /* X86_TUNE_USE_BT */
1186 m_ATHLON_K8_AMDFAM10,
1187
1188 /* X86_TUNE_USE_INCDEC */
1189 ~(m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC),
1190
1191 /* X86_TUNE_PAD_RETURNS */
1192 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1193
1194 /* X86_TUNE_EXT_80387_CONSTANTS */
1195 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_SHORTEN_X87_SSE */
1198 ~m_K8,
1199
1200 /* X86_TUNE_AVOID_VECTOR_DECODE */
1201 m_K8 | m_GENERIC64,
1202
1203 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1204 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1205 ~(m_386 | m_486),
1206
1207 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1208 vector path on AMD machines. */
1209 m_K8 | m_GENERIC64 | m_AMDFAM10,
1210
1211 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1212 machines. */
1213 m_K8 | m_GENERIC64 | m_AMDFAM10,
1214
1215 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1216 than a MOV. */
1217 m_PENT,
1218
1219 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1220 but one byte longer. */
1221 m_PENT,
1222
1223 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1224 operand that cannot be represented using a modRM byte. The XOR
1225 replacement is long decoded, so this split helps here as well. */
1226 m_K6,
1227 };
1228
1229 /* Feature tests against the various architecture variations. */
1230 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1231 /* X86_ARCH_CMOVE */
1232 m_PPRO | m_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA,
1233
1234 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1235 ~m_386,
1236
1237 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1238 ~(m_386 | m_486),
1239
1240 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1241 ~m_386,
1242
1243 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1244 ~m_386,
1245 };
1246
1247 static const unsigned int x86_accumulate_outgoing_args
1248 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1249
1250 static const unsigned int x86_arch_always_fancy_math_387
1251 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1252 | m_NOCONA | m_CORE2 | m_GENERIC;
1253
1254 static enum stringop_alg stringop_alg = no_stringop;
1255
1256 /* In case the average insn count for single function invocation is
1257 lower than this constant, emit fast (but longer) prologue and
1258 epilogue code. */
1259 #define FAST_PROLOGUE_INSN_COUNT 20
1260
1261 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1262 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1263 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1264 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1265
1266 /* Array of the smallest class containing reg number REGNO, indexed by
1267 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1268
1269 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1270 {
1271 /* ax, dx, cx, bx */
1272 AREG, DREG, CREG, BREG,
1273 /* si, di, bp, sp */
1274 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1275 /* FP registers */
1276 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1277 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1278 /* arg pointer */
1279 NON_Q_REGS,
1280 /* flags, fpsr, fpcr, frame */
1281 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1282 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1283 SSE_REGS, SSE_REGS,
1284 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1285 MMX_REGS, MMX_REGS,
1286 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1287 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1288 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1289 SSE_REGS, SSE_REGS,
1290 };
1291
1292 /* The "default" register map used in 32bit mode. */
1293
1294 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1295 {
1296 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1297 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1298 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1299 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1300 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1301 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1302 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1303 };
1304
1305 static int const x86_64_int_parameter_registers[6] =
1306 {
1307 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1308 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1309 };
1310
1311 static int const x86_64_int_return_registers[4] =
1312 {
1313 0 /*RAX*/, 1 /*RDI*/, 5 /*RDI*/, 4 /*RSI*/
1314 };
1315
1316 /* The "default" register map used in 64bit mode. */
1317 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1318 {
1319 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1320 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1321 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1322 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1323 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1324 8,9,10,11,12,13,14,15, /* extended integer registers */
1325 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1326 };
1327
1328 /* Define the register numbers to be used in Dwarf debugging information.
1329 The SVR4 reference port C compiler uses the following register numbers
1330 in its Dwarf output code:
1331 0 for %eax (gcc regno = 0)
1332 1 for %ecx (gcc regno = 2)
1333 2 for %edx (gcc regno = 1)
1334 3 for %ebx (gcc regno = 3)
1335 4 for %esp (gcc regno = 7)
1336 5 for %ebp (gcc regno = 6)
1337 6 for %esi (gcc regno = 4)
1338 7 for %edi (gcc regno = 5)
1339 The following three DWARF register numbers are never generated by
1340 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1341 believes these numbers have these meanings.
1342 8 for %eip (no gcc equivalent)
1343 9 for %eflags (gcc regno = 17)
1344 10 for %trapno (no gcc equivalent)
1345 It is not at all clear how we should number the FP stack registers
1346 for the x86 architecture. If the version of SDB on x86/svr4 were
1347 a bit less brain dead with respect to floating-point then we would
1348 have a precedent to follow with respect to DWARF register numbers
1349 for x86 FP registers, but the SDB on x86/svr4 is so completely
1350 broken with respect to FP registers that it is hardly worth thinking
1351 of it as something to strive for compatibility with.
1352 The version of x86/svr4 SDB I have at the moment does (partially)
1353 seem to believe that DWARF register number 11 is associated with
1354 the x86 register %st(0), but that's about all. Higher DWARF
1355 register numbers don't seem to be associated with anything in
1356 particular, and even for DWARF regno 11, SDB only seems to under-
1357 stand that it should say that a variable lives in %st(0) (when
1358 asked via an `=' command) if we said it was in DWARF regno 11,
1359 but SDB still prints garbage when asked for the value of the
1360 variable in question (via a `/' command).
1361 (Also note that the labels SDB prints for various FP stack regs
1362 when doing an `x' command are all wrong.)
1363 Note that these problems generally don't affect the native SVR4
1364 C compiler because it doesn't allow the use of -O with -g and
1365 because when it is *not* optimizing, it allocates a memory
1366 location for each floating-point variable, and the memory
1367 location is what gets described in the DWARF AT_location
1368 attribute for the variable in question.
1369 Regardless of the severe mental illness of the x86/svr4 SDB, we
1370 do something sensible here and we use the following DWARF
1371 register numbers. Note that these are all stack-top-relative
1372 numbers.
1373 11 for %st(0) (gcc regno = 8)
1374 12 for %st(1) (gcc regno = 9)
1375 13 for %st(2) (gcc regno = 10)
1376 14 for %st(3) (gcc regno = 11)
1377 15 for %st(4) (gcc regno = 12)
1378 16 for %st(5) (gcc regno = 13)
1379 17 for %st(6) (gcc regno = 14)
1380 18 for %st(7) (gcc regno = 15)
1381 */
1382 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1383 {
1384 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1385 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1386 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1387 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1388 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1389 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1390 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1391 };
1392
1393 /* Test and compare insns in i386.md store the information needed to
1394 generate branch and scc insns here. */
1395
1396 rtx ix86_compare_op0 = NULL_RTX;
1397 rtx ix86_compare_op1 = NULL_RTX;
1398 rtx ix86_compare_emitted = NULL_RTX;
1399
1400 /* Size of the register save area. */
1401 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1402
1403 /* Define the structure for the machine field in struct function. */
1404
1405 struct stack_local_entry GTY(())
1406 {
1407 unsigned short mode;
1408 unsigned short n;
1409 rtx rtl;
1410 struct stack_local_entry *next;
1411 };
1412
1413 /* Structure describing stack frame layout.
1414 Stack grows downward:
1415
1416 [arguments]
1417 <- ARG_POINTER
1418 saved pc
1419
1420 saved frame pointer if frame_pointer_needed
1421 <- HARD_FRAME_POINTER
1422 [saved regs]
1423
1424 [padding1] \
1425 )
1426 [va_arg registers] (
1427 > to_allocate <- FRAME_POINTER
1428 [frame] (
1429 )
1430 [padding2] /
1431 */
1432 struct ix86_frame
1433 {
1434 int nregs;
1435 int padding1;
1436 int va_arg_size;
1437 HOST_WIDE_INT frame;
1438 int padding2;
1439 int outgoing_arguments_size;
1440 int red_zone_size;
1441
1442 HOST_WIDE_INT to_allocate;
1443 /* The offsets relative to ARG_POINTER. */
1444 HOST_WIDE_INT frame_pointer_offset;
1445 HOST_WIDE_INT hard_frame_pointer_offset;
1446 HOST_WIDE_INT stack_pointer_offset;
1447
1448 /* When save_regs_using_mov is set, emit prologue using
1449 move instead of push instructions. */
1450 bool save_regs_using_mov;
1451 };
1452
1453 /* Code model option. */
1454 enum cmodel ix86_cmodel;
1455 /* Asm dialect. */
1456 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1457 /* TLS dialects. */
1458 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1459
1460 /* Which unit we are generating floating point math for. */
1461 enum fpmath_unit ix86_fpmath;
1462
1463 /* Which cpu are we scheduling for. */
1464 enum processor_type ix86_tune;
1465
1466 /* Which instruction set architecture to use. */
1467 enum processor_type ix86_arch;
1468
1469 /* true if sse prefetch instruction is not NOOP. */
1470 int x86_prefetch_sse;
1471
1472 /* ix86_regparm_string as a number */
1473 static int ix86_regparm;
1474
1475 /* -mstackrealign option */
1476 extern int ix86_force_align_arg_pointer;
1477 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1478
1479 /* Preferred alignment for stack boundary in bits. */
1480 unsigned int ix86_preferred_stack_boundary;
1481
1482 /* Values 1-5: see jump.c */
1483 int ix86_branch_cost;
1484
1485 /* Variables which are this size or smaller are put in the data/bss
1486 or ldata/lbss sections. */
1487
1488 int ix86_section_threshold = 65536;
1489
1490 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1491 char internal_label_prefix[16];
1492 int internal_label_prefix_len;
1493
1494 /* Register class used for passing given 64bit part of the argument.
1495 These represent classes as documented by the PS ABI, with the exception
1496 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1497 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1498
1499 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1500 whenever possible (upper half does contain padding). */
1501 enum x86_64_reg_class
1502 {
1503 X86_64_NO_CLASS,
1504 X86_64_INTEGER_CLASS,
1505 X86_64_INTEGERSI_CLASS,
1506 X86_64_SSE_CLASS,
1507 X86_64_SSESF_CLASS,
1508 X86_64_SSEDF_CLASS,
1509 X86_64_SSEUP_CLASS,
1510 X86_64_X87_CLASS,
1511 X86_64_X87UP_CLASS,
1512 X86_64_COMPLEX_X87_CLASS,
1513 X86_64_MEMORY_CLASS
1514 };
1515 static const char * const x86_64_reg_class_name[] =
1516 {
1517 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1518 "sseup", "x87", "x87up", "cplx87", "no"
1519 };
1520
1521 #define MAX_CLASSES 4
1522
1523 /* Table of constants used by fldpi, fldln2, etc.... */
1524 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1525 static bool ext_80387_constants_init = 0;
1526
1527 \f
1528 static struct machine_function * ix86_init_machine_status (void);
1529 static rtx ix86_function_value (tree, tree, bool);
1530 static int ix86_function_regparm (tree, tree);
1531 static void ix86_compute_frame_layout (struct ix86_frame *);
1532 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1533 rtx, rtx, int);
1534
1535 \f
1536 /* The svr4 ABI for the i386 says that records and unions are returned
1537 in memory. */
1538 #ifndef DEFAULT_PCC_STRUCT_RETURN
1539 #define DEFAULT_PCC_STRUCT_RETURN 1
1540 #endif
1541
1542 /* Implement TARGET_HANDLE_OPTION. */
1543
1544 static bool
1545 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1546 {
1547 switch (code)
1548 {
1549 case OPT_m3dnow:
1550 if (!value)
1551 {
1552 target_flags &= ~MASK_3DNOW_A;
1553 target_flags_explicit |= MASK_3DNOW_A;
1554 }
1555 return true;
1556
1557 case OPT_mmmx:
1558 if (!value)
1559 {
1560 target_flags &= ~(MASK_3DNOW | MASK_3DNOW_A);
1561 target_flags_explicit |= MASK_3DNOW | MASK_3DNOW_A;
1562 }
1563 return true;
1564
1565 case OPT_msse:
1566 if (!value)
1567 {
1568 target_flags &= ~(MASK_SSE2 | MASK_SSE3 | MASK_SSE4A);
1569 target_flags_explicit |= MASK_SSE2 | MASK_SSE3 | MASK_SSE4A;
1570 }
1571 return true;
1572
1573 case OPT_msse2:
1574 if (!value)
1575 {
1576 target_flags &= ~(MASK_SSE3 | MASK_SSE4A);
1577 target_flags_explicit |= MASK_SSE3 | MASK_SSE4A;
1578 }
1579 return true;
1580
1581 case OPT_msse3:
1582 if (!value)
1583 {
1584 target_flags &= ~MASK_SSE4A;
1585 target_flags_explicit |= MASK_SSE4A;
1586 }
1587 return true;
1588
1589 default:
1590 return true;
1591 }
1592 }
1593
1594 /* Sometimes certain combinations of command options do not make
1595 sense on a particular target machine. You can define a macro
1596 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1597 defined, is executed once just after all the command options have
1598 been parsed.
1599
1600 Don't use this macro to turn on various extra optimizations for
1601 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1602
1603 void
1604 override_options (void)
1605 {
1606 int i;
1607 int ix86_tune_defaulted = 0;
1608 unsigned int ix86_arch_mask, ix86_tune_mask;
1609
1610 /* Comes from final.c -- no real reason to change it. */
1611 #define MAX_CODE_ALIGN 16
1612
1613 static struct ptt
1614 {
1615 const struct processor_costs *cost; /* Processor costs */
1616 const int target_enable; /* Target flags to enable. */
1617 const int target_disable; /* Target flags to disable. */
1618 const int align_loop; /* Default alignments. */
1619 const int align_loop_max_skip;
1620 const int align_jump;
1621 const int align_jump_max_skip;
1622 const int align_func;
1623 }
1624 const processor_target_table[PROCESSOR_max] =
1625 {
1626 {&i386_cost, 0, 0, 4, 3, 4, 3, 4},
1627 {&i486_cost, 0, 0, 16, 15, 16, 15, 16},
1628 {&pentium_cost, 0, 0, 16, 7, 16, 7, 16},
1629 {&pentiumpro_cost, 0, 0, 16, 15, 16, 7, 16},
1630 {&geode_cost, 0, 0, 0, 0, 0, 0, 0},
1631 {&k6_cost, 0, 0, 32, 7, 32, 7, 32},
1632 {&athlon_cost, 0, 0, 16, 7, 16, 7, 16},
1633 {&pentium4_cost, 0, 0, 0, 0, 0, 0, 0},
1634 {&k8_cost, 0, 0, 16, 7, 16, 7, 16},
1635 {&nocona_cost, 0, 0, 0, 0, 0, 0, 0},
1636 {&core2_cost, 0, 0, 16, 7, 16, 7, 16},
1637 {&generic32_cost, 0, 0, 16, 7, 16, 7, 16},
1638 {&generic64_cost, 0, 0, 16, 7, 16, 7, 16},
1639 {&amdfam10_cost, 0, 0, 32, 7, 32, 7, 32}
1640 };
1641
1642 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1643 static struct pta
1644 {
1645 const char *const name; /* processor name or nickname. */
1646 const enum processor_type processor;
1647 const enum pta_flags
1648 {
1649 PTA_SSE = 1 << 0,
1650 PTA_SSE2 = 1 << 1,
1651 PTA_SSE3 = 1 << 2,
1652 PTA_MMX = 1 << 3,
1653 PTA_PREFETCH_SSE = 1 << 4,
1654 PTA_3DNOW = 1 << 5,
1655 PTA_3DNOW_A = 1 << 6,
1656 PTA_64BIT = 1 << 7,
1657 PTA_SSSE3 = 1 << 8,
1658 PTA_CX16 = 1 << 9,
1659 PTA_POPCNT = 1 << 10,
1660 PTA_ABM = 1 << 11,
1661 PTA_SSE4A = 1 << 12,
1662 PTA_NO_SAHF = 1 << 13
1663 } flags;
1664 }
1665 const processor_alias_table[] =
1666 {
1667 {"i386", PROCESSOR_I386, 0},
1668 {"i486", PROCESSOR_I486, 0},
1669 {"i586", PROCESSOR_PENTIUM, 0},
1670 {"pentium", PROCESSOR_PENTIUM, 0},
1671 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1672 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1673 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1674 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1675 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_PREFETCH_SSE | PTA_SSE},
1676 {"i686", PROCESSOR_PENTIUMPRO, 0},
1677 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1678 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1679 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1680 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE},
1681 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_PREFETCH_SSE | PTA_SSE2},
1682 {"pentium4", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1683 | PTA_MMX | PTA_PREFETCH_SSE},
1684 {"pentium4m", PROCESSOR_PENTIUM4, PTA_SSE | PTA_SSE2
1685 | PTA_MMX | PTA_PREFETCH_SSE},
1686 {"prescott", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3
1687 | PTA_MMX | PTA_PREFETCH_SSE},
1688 {"nocona", PROCESSOR_NOCONA, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_64BIT
1689 | PTA_MMX | PTA_PREFETCH_SSE
1690 | PTA_CX16 | PTA_NO_SAHF},
1691 {"core2", PROCESSOR_CORE2, PTA_SSE | PTA_SSE2 | PTA_SSE3 | PTA_SSSE3
1692 | PTA_64BIT | PTA_MMX
1693 | PTA_PREFETCH_SSE | PTA_CX16},
1694 {"geode", PROCESSOR_GEODE, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1695 | PTA_3DNOW_A},
1696 {"k6", PROCESSOR_K6, PTA_MMX},
1697 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1698 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1699 {"athlon", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1700 | PTA_3DNOW_A},
1701 {"athlon-tbird", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE
1702 | PTA_3DNOW | PTA_3DNOW_A},
1703 {"athlon-4", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1704 | PTA_3DNOW_A | PTA_SSE},
1705 {"athlon-xp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1706 | PTA_3DNOW_A | PTA_SSE},
1707 {"athlon-mp", PROCESSOR_ATHLON, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1708 | PTA_3DNOW_A | PTA_SSE},
1709 {"x86-64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_64BIT
1710 | PTA_SSE | PTA_SSE2 | PTA_NO_SAHF},
1711 {"k8", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW | PTA_64BIT
1712 | PTA_3DNOW_A | PTA_SSE | PTA_SSE2
1713 | PTA_NO_SAHF},
1714 {"opteron", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1715 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1716 | PTA_SSE2 | PTA_NO_SAHF},
1717 {"athlon64", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1718 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1719 | PTA_SSE2 | PTA_NO_SAHF},
1720 {"athlon-fx", PROCESSOR_K8, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1721 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1722 | PTA_SSE2 | PTA_NO_SAHF},
1723 {"amdfam10", PROCESSOR_AMDFAM10, PTA_MMX | PTA_PREFETCH_SSE | PTA_3DNOW
1724 | PTA_64BIT | PTA_3DNOW_A | PTA_SSE
1725 | PTA_SSE2 | PTA_SSE3 | PTA_POPCNT
1726 | PTA_ABM | PTA_SSE4A | PTA_CX16},
1727 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1728 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1729 };
1730
1731 int const pta_size = ARRAY_SIZE (processor_alias_table);
1732
1733 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1734 SUBTARGET_OVERRIDE_OPTIONS;
1735 #endif
1736
1737 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1738 SUBSUBTARGET_OVERRIDE_OPTIONS;
1739 #endif
1740
1741 /* -fPIC is the default for x86_64. */
1742 if (TARGET_MACHO && TARGET_64BIT)
1743 flag_pic = 2;
1744
1745 /* Set the default values for switches whose default depends on TARGET_64BIT
1746 in case they weren't overwritten by command line options. */
1747 if (TARGET_64BIT)
1748 {
1749 /* Mach-O doesn't support omitting the frame pointer for now. */
1750 if (flag_omit_frame_pointer == 2)
1751 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1752 if (flag_asynchronous_unwind_tables == 2)
1753 flag_asynchronous_unwind_tables = 1;
1754 if (flag_pcc_struct_return == 2)
1755 flag_pcc_struct_return = 0;
1756 }
1757 else
1758 {
1759 if (flag_omit_frame_pointer == 2)
1760 flag_omit_frame_pointer = 0;
1761 if (flag_asynchronous_unwind_tables == 2)
1762 flag_asynchronous_unwind_tables = 0;
1763 if (flag_pcc_struct_return == 2)
1764 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1765 }
1766
1767 /* Need to check -mtune=generic first. */
1768 if (ix86_tune_string)
1769 {
1770 if (!strcmp (ix86_tune_string, "generic")
1771 || !strcmp (ix86_tune_string, "i686")
1772 /* As special support for cross compilers we read -mtune=native
1773 as -mtune=generic. With native compilers we won't see the
1774 -mtune=native, as it was changed by the driver. */
1775 || !strcmp (ix86_tune_string, "native"))
1776 {
1777 if (TARGET_64BIT)
1778 ix86_tune_string = "generic64";
1779 else
1780 ix86_tune_string = "generic32";
1781 }
1782 else if (!strncmp (ix86_tune_string, "generic", 7))
1783 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1784 }
1785 else
1786 {
1787 if (ix86_arch_string)
1788 ix86_tune_string = ix86_arch_string;
1789 if (!ix86_tune_string)
1790 {
1791 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1792 ix86_tune_defaulted = 1;
1793 }
1794
1795 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1796 need to use a sensible tune option. */
1797 if (!strcmp (ix86_tune_string, "generic")
1798 || !strcmp (ix86_tune_string, "x86-64")
1799 || !strcmp (ix86_tune_string, "i686"))
1800 {
1801 if (TARGET_64BIT)
1802 ix86_tune_string = "generic64";
1803 else
1804 ix86_tune_string = "generic32";
1805 }
1806 }
1807 if (ix86_stringop_string)
1808 {
1809 if (!strcmp (ix86_stringop_string, "rep_byte"))
1810 stringop_alg = rep_prefix_1_byte;
1811 else if (!strcmp (ix86_stringop_string, "libcall"))
1812 stringop_alg = libcall;
1813 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1814 stringop_alg = rep_prefix_4_byte;
1815 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1816 stringop_alg = rep_prefix_8_byte;
1817 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1818 stringop_alg = loop_1_byte;
1819 else if (!strcmp (ix86_stringop_string, "loop"))
1820 stringop_alg = loop;
1821 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1822 stringop_alg = unrolled_loop;
1823 else
1824 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1825 }
1826 if (!strcmp (ix86_tune_string, "x86-64"))
1827 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1828 "-mtune=generic instead as appropriate.");
1829
1830 if (!ix86_arch_string)
1831 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1832 if (!strcmp (ix86_arch_string, "generic"))
1833 error ("generic CPU can be used only for -mtune= switch");
1834 if (!strncmp (ix86_arch_string, "generic", 7))
1835 error ("bad value (%s) for -march= switch", ix86_arch_string);
1836
1837 if (ix86_cmodel_string != 0)
1838 {
1839 if (!strcmp (ix86_cmodel_string, "small"))
1840 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1841 else if (!strcmp (ix86_cmodel_string, "medium"))
1842 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
1843 else if (!strcmp (ix86_cmodel_string, "large"))
1844 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
1845 else if (flag_pic)
1846 error ("code model %s does not support PIC mode", ix86_cmodel_string);
1847 else if (!strcmp (ix86_cmodel_string, "32"))
1848 ix86_cmodel = CM_32;
1849 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
1850 ix86_cmodel = CM_KERNEL;
1851 else
1852 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
1853 }
1854 else
1855 {
1856 ix86_cmodel = CM_32;
1857 if (TARGET_64BIT)
1858 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
1859 }
1860 if (ix86_asm_string != 0)
1861 {
1862 if (! TARGET_MACHO
1863 && !strcmp (ix86_asm_string, "intel"))
1864 ix86_asm_dialect = ASM_INTEL;
1865 else if (!strcmp (ix86_asm_string, "att"))
1866 ix86_asm_dialect = ASM_ATT;
1867 else
1868 error ("bad value (%s) for -masm= switch", ix86_asm_string);
1869 }
1870 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
1871 error ("code model %qs not supported in the %s bit mode",
1872 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
1873 if ((TARGET_64BIT != 0) != ((target_flags & MASK_64BIT) != 0))
1874 sorry ("%i-bit mode not compiled in",
1875 (target_flags & MASK_64BIT) ? 64 : 32);
1876
1877 for (i = 0; i < pta_size; i++)
1878 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
1879 {
1880 ix86_arch = processor_alias_table[i].processor;
1881 /* Default cpu tuning to the architecture. */
1882 ix86_tune = ix86_arch;
1883 if (processor_alias_table[i].flags & PTA_MMX
1884 && !(target_flags_explicit & MASK_MMX))
1885 target_flags |= MASK_MMX;
1886 if (processor_alias_table[i].flags & PTA_3DNOW
1887 && !(target_flags_explicit & MASK_3DNOW))
1888 target_flags |= MASK_3DNOW;
1889 if (processor_alias_table[i].flags & PTA_3DNOW_A
1890 && !(target_flags_explicit & MASK_3DNOW_A))
1891 target_flags |= MASK_3DNOW_A;
1892 if (processor_alias_table[i].flags & PTA_SSE
1893 && !(target_flags_explicit & MASK_SSE))
1894 target_flags |= MASK_SSE;
1895 if (processor_alias_table[i].flags & PTA_SSE2
1896 && !(target_flags_explicit & MASK_SSE2))
1897 target_flags |= MASK_SSE2;
1898 if (processor_alias_table[i].flags & PTA_SSE3
1899 && !(target_flags_explicit & MASK_SSE3))
1900 target_flags |= MASK_SSE3;
1901 if (processor_alias_table[i].flags & PTA_SSSE3
1902 && !(target_flags_explicit & MASK_SSSE3))
1903 target_flags |= MASK_SSSE3;
1904 if (processor_alias_table[i].flags & PTA_PREFETCH_SSE)
1905 x86_prefetch_sse = true;
1906 if (processor_alias_table[i].flags & PTA_CX16)
1907 x86_cmpxchg16b = true;
1908 if (processor_alias_table[i].flags & PTA_POPCNT
1909 && !(target_flags_explicit & MASK_POPCNT))
1910 target_flags |= MASK_POPCNT;
1911 if (processor_alias_table[i].flags & PTA_ABM
1912 && !(target_flags_explicit & MASK_ABM))
1913 target_flags |= MASK_ABM;
1914 if (processor_alias_table[i].flags & PTA_SSE4A
1915 && !(target_flags_explicit & MASK_SSE4A))
1916 target_flags |= MASK_SSE4A;
1917 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
1918 x86_sahf = true;
1919 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1920 error ("CPU you selected does not support x86-64 "
1921 "instruction set");
1922 break;
1923 }
1924
1925 if (i == pta_size)
1926 error ("bad value (%s) for -march= switch", ix86_arch_string);
1927
1928 ix86_arch_mask = 1u << ix86_arch;
1929 for (i = 0; i < X86_ARCH_LAST; ++i)
1930 ix86_arch_features[i] &= ix86_arch_mask;
1931
1932 for (i = 0; i < pta_size; i++)
1933 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
1934 {
1935 ix86_tune = processor_alias_table[i].processor;
1936 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
1937 {
1938 if (ix86_tune_defaulted)
1939 {
1940 ix86_tune_string = "x86-64";
1941 for (i = 0; i < pta_size; i++)
1942 if (! strcmp (ix86_tune_string,
1943 processor_alias_table[i].name))
1944 break;
1945 ix86_tune = processor_alias_table[i].processor;
1946 }
1947 else
1948 error ("CPU you selected does not support x86-64 "
1949 "instruction set");
1950 }
1951 /* Intel CPUs have always interpreted SSE prefetch instructions as
1952 NOPs; so, we can enable SSE prefetch instructions even when
1953 -mtune (rather than -march) points us to a processor that has them.
1954 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
1955 higher processors. */
1956 if (TARGET_CMOVE && (processor_alias_table[i].flags & PTA_PREFETCH_SSE))
1957 x86_prefetch_sse = true;
1958 break;
1959 }
1960 if (i == pta_size)
1961 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1962
1963 ix86_tune_mask = 1u << ix86_tune;
1964 for (i = 0; i < X86_TUNE_LAST; ++i)
1965 ix86_tune_features[i] &= ix86_tune_mask;
1966
1967 if (optimize_size)
1968 ix86_cost = &size_cost;
1969 else
1970 ix86_cost = processor_target_table[ix86_tune].cost;
1971 target_flags |= processor_target_table[ix86_tune].target_enable;
1972 target_flags &= ~processor_target_table[ix86_tune].target_disable;
1973
1974 /* Arrange to set up i386_stack_locals for all functions. */
1975 init_machine_status = ix86_init_machine_status;
1976
1977 /* Validate -mregparm= value. */
1978 if (ix86_regparm_string)
1979 {
1980 i = atoi (ix86_regparm_string);
1981 if (i < 0 || i > REGPARM_MAX)
1982 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
1983 else
1984 ix86_regparm = i;
1985 }
1986 else
1987 if (TARGET_64BIT)
1988 ix86_regparm = REGPARM_MAX;
1989
1990 /* If the user has provided any of the -malign-* options,
1991 warn and use that value only if -falign-* is not set.
1992 Remove this code in GCC 3.2 or later. */
1993 if (ix86_align_loops_string)
1994 {
1995 warning (0, "-malign-loops is obsolete, use -falign-loops");
1996 if (align_loops == 0)
1997 {
1998 i = atoi (ix86_align_loops_string);
1999 if (i < 0 || i > MAX_CODE_ALIGN)
2000 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2001 else
2002 align_loops = 1 << i;
2003 }
2004 }
2005
2006 if (ix86_align_jumps_string)
2007 {
2008 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2009 if (align_jumps == 0)
2010 {
2011 i = atoi (ix86_align_jumps_string);
2012 if (i < 0 || i > MAX_CODE_ALIGN)
2013 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2014 else
2015 align_jumps = 1 << i;
2016 }
2017 }
2018
2019 if (ix86_align_funcs_string)
2020 {
2021 warning (0, "-malign-functions is obsolete, use -falign-functions");
2022 if (align_functions == 0)
2023 {
2024 i = atoi (ix86_align_funcs_string);
2025 if (i < 0 || i > MAX_CODE_ALIGN)
2026 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2027 else
2028 align_functions = 1 << i;
2029 }
2030 }
2031
2032 /* Default align_* from the processor table. */
2033 if (align_loops == 0)
2034 {
2035 align_loops = processor_target_table[ix86_tune].align_loop;
2036 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2037 }
2038 if (align_jumps == 0)
2039 {
2040 align_jumps = processor_target_table[ix86_tune].align_jump;
2041 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2042 }
2043 if (align_functions == 0)
2044 {
2045 align_functions = processor_target_table[ix86_tune].align_func;
2046 }
2047
2048 /* Validate -mbranch-cost= value, or provide default. */
2049 ix86_branch_cost = ix86_cost->branch_cost;
2050 if (ix86_branch_cost_string)
2051 {
2052 i = atoi (ix86_branch_cost_string);
2053 if (i < 0 || i > 5)
2054 error ("-mbranch-cost=%d is not between 0 and 5", i);
2055 else
2056 ix86_branch_cost = i;
2057 }
2058 if (ix86_section_threshold_string)
2059 {
2060 i = atoi (ix86_section_threshold_string);
2061 if (i < 0)
2062 error ("-mlarge-data-threshold=%d is negative", i);
2063 else
2064 ix86_section_threshold = i;
2065 }
2066
2067 if (ix86_tls_dialect_string)
2068 {
2069 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2070 ix86_tls_dialect = TLS_DIALECT_GNU;
2071 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2072 ix86_tls_dialect = TLS_DIALECT_GNU2;
2073 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2074 ix86_tls_dialect = TLS_DIALECT_SUN;
2075 else
2076 error ("bad value (%s) for -mtls-dialect= switch",
2077 ix86_tls_dialect_string);
2078 }
2079
2080 /* Keep nonleaf frame pointers. */
2081 if (flag_omit_frame_pointer)
2082 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2083 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2084 flag_omit_frame_pointer = 1;
2085
2086 /* If we're doing fast math, we don't care about comparison order
2087 wrt NaNs. This lets us use a shorter comparison sequence. */
2088 if (flag_finite_math_only)
2089 target_flags &= ~MASK_IEEE_FP;
2090
2091 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2092 since the insns won't need emulation. */
2093 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2094 target_flags &= ~MASK_NO_FANCY_MATH_387;
2095
2096 /* Likewise, if the target doesn't have a 387, or we've specified
2097 software floating point, don't use 387 inline intrinsics. */
2098 if (!TARGET_80387)
2099 target_flags |= MASK_NO_FANCY_MATH_387;
2100
2101 /* Turn on SSE3 builtins for -mssse3. */
2102 if (TARGET_SSSE3)
2103 target_flags |= MASK_SSE3;
2104
2105 /* Turn on SSE3 builtins for -msse4a. */
2106 if (TARGET_SSE4A)
2107 target_flags |= MASK_SSE3;
2108
2109 /* Turn on SSE2 builtins for -msse3. */
2110 if (TARGET_SSE3)
2111 target_flags |= MASK_SSE2;
2112
2113 /* Turn on SSE builtins for -msse2. */
2114 if (TARGET_SSE2)
2115 target_flags |= MASK_SSE;
2116
2117 /* Turn on MMX builtins for -msse. */
2118 if (TARGET_SSE)
2119 {
2120 target_flags |= MASK_MMX & ~target_flags_explicit;
2121 x86_prefetch_sse = true;
2122 }
2123
2124 /* Turn on MMX builtins for 3Dnow. */
2125 if (TARGET_3DNOW)
2126 target_flags |= MASK_MMX;
2127
2128 /* Turn on POPCNT builtins for -mabm. */
2129 if (TARGET_ABM)
2130 target_flags |= MASK_POPCNT;
2131
2132 if (TARGET_64BIT)
2133 {
2134 if (TARGET_ALIGN_DOUBLE)
2135 error ("-malign-double makes no sense in the 64bit mode");
2136 if (TARGET_RTD)
2137 error ("-mrtd calling convention not supported in the 64bit mode");
2138
2139 /* Enable by default the SSE and MMX builtins. Do allow the user to
2140 explicitly disable any of these. In particular, disabling SSE and
2141 MMX for kernel code is extremely useful. */
2142 target_flags
2143 |= ((MASK_SSE2 | MASK_SSE | MASK_MMX | MASK_128BIT_LONG_DOUBLE)
2144 & ~target_flags_explicit);
2145 }
2146 else
2147 {
2148 /* i386 ABI does not specify red zone. It still makes sense to use it
2149 when programmer takes care to stack from being destroyed. */
2150 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2151 target_flags |= MASK_NO_RED_ZONE;
2152 }
2153
2154 /* Validate -mpreferred-stack-boundary= value, or provide default.
2155 The default of 128 bits is for Pentium III's SSE __m128. We can't
2156 change it because of optimize_size. Otherwise, we can't mix object
2157 files compiled with -Os and -On. */
2158 ix86_preferred_stack_boundary = 128;
2159 if (ix86_preferred_stack_boundary_string)
2160 {
2161 i = atoi (ix86_preferred_stack_boundary_string);
2162 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2163 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2164 TARGET_64BIT ? 4 : 2);
2165 else
2166 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2167 }
2168
2169 /* Accept -msseregparm only if at least SSE support is enabled. */
2170 if (TARGET_SSEREGPARM
2171 && ! TARGET_SSE)
2172 error ("-msseregparm used without SSE enabled");
2173
2174 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2175 if (ix86_fpmath_string != 0)
2176 {
2177 if (! strcmp (ix86_fpmath_string, "387"))
2178 ix86_fpmath = FPMATH_387;
2179 else if (! strcmp (ix86_fpmath_string, "sse"))
2180 {
2181 if (!TARGET_SSE)
2182 {
2183 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2184 ix86_fpmath = FPMATH_387;
2185 }
2186 else
2187 ix86_fpmath = FPMATH_SSE;
2188 }
2189 else if (! strcmp (ix86_fpmath_string, "387,sse")
2190 || ! strcmp (ix86_fpmath_string, "sse,387"))
2191 {
2192 if (!TARGET_SSE)
2193 {
2194 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2195 ix86_fpmath = FPMATH_387;
2196 }
2197 else if (!TARGET_80387)
2198 {
2199 warning (0, "387 instruction set disabled, using SSE arithmetics");
2200 ix86_fpmath = FPMATH_SSE;
2201 }
2202 else
2203 ix86_fpmath = FPMATH_SSE | FPMATH_387;
2204 }
2205 else
2206 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2207 }
2208
2209 /* If the i387 is disabled, then do not return values in it. */
2210 if (!TARGET_80387)
2211 target_flags &= ~MASK_FLOAT_RETURNS;
2212
2213 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2214 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2215 && !optimize_size)
2216 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2217
2218 /* ??? Unwind info is not correct around the CFG unless either a frame
2219 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2220 unwind info generation to be aware of the CFG and propagating states
2221 around edges. */
2222 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2223 || flag_exceptions || flag_non_call_exceptions)
2224 && flag_omit_frame_pointer
2225 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2226 {
2227 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2228 warning (0, "unwind tables currently require either a frame pointer "
2229 "or -maccumulate-outgoing-args for correctness");
2230 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2231 }
2232
2233 /* For sane SSE instruction set generation we need fcomi instruction.
2234 It is safe to enable all CMOVE instructions. */
2235 if (TARGET_SSE)
2236 TARGET_CMOVE = 1;
2237
2238 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2239 {
2240 char *p;
2241 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2242 p = strchr (internal_label_prefix, 'X');
2243 internal_label_prefix_len = p - internal_label_prefix;
2244 *p = '\0';
2245 }
2246
2247 /* When scheduling description is not available, disable scheduler pass
2248 so it won't slow down the compilation and make x87 code slower. */
2249 if (!TARGET_SCHEDULE)
2250 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2251
2252 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2253 set_param_value ("simultaneous-prefetches",
2254 ix86_cost->simultaneous_prefetches);
2255 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2256 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2257 }
2258 \f
2259 /* Return true if this goes in large data/bss. */
2260
2261 static bool
2262 ix86_in_large_data_p (tree exp)
2263 {
2264 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2265 return false;
2266
2267 /* Functions are never large data. */
2268 if (TREE_CODE (exp) == FUNCTION_DECL)
2269 return false;
2270
2271 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2272 {
2273 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2274 if (strcmp (section, ".ldata") == 0
2275 || strcmp (section, ".lbss") == 0)
2276 return true;
2277 return false;
2278 }
2279 else
2280 {
2281 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2282
2283 /* If this is an incomplete type with size 0, then we can't put it
2284 in data because it might be too big when completed. */
2285 if (!size || size > ix86_section_threshold)
2286 return true;
2287 }
2288
2289 return false;
2290 }
2291
2292 /* Switch to the appropriate section for output of DECL.
2293 DECL is either a `VAR_DECL' node or a constant of some sort.
2294 RELOC indicates whether forming the initial value of DECL requires
2295 link-time relocations. */
2296
2297 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2298 ATTRIBUTE_UNUSED;
2299
2300 static section *
2301 x86_64_elf_select_section (tree decl, int reloc,
2302 unsigned HOST_WIDE_INT align)
2303 {
2304 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2305 && ix86_in_large_data_p (decl))
2306 {
2307 const char *sname = NULL;
2308 unsigned int flags = SECTION_WRITE;
2309 switch (categorize_decl_for_section (decl, reloc))
2310 {
2311 case SECCAT_DATA:
2312 sname = ".ldata";
2313 break;
2314 case SECCAT_DATA_REL:
2315 sname = ".ldata.rel";
2316 break;
2317 case SECCAT_DATA_REL_LOCAL:
2318 sname = ".ldata.rel.local";
2319 break;
2320 case SECCAT_DATA_REL_RO:
2321 sname = ".ldata.rel.ro";
2322 break;
2323 case SECCAT_DATA_REL_RO_LOCAL:
2324 sname = ".ldata.rel.ro.local";
2325 break;
2326 case SECCAT_BSS:
2327 sname = ".lbss";
2328 flags |= SECTION_BSS;
2329 break;
2330 case SECCAT_RODATA:
2331 case SECCAT_RODATA_MERGE_STR:
2332 case SECCAT_RODATA_MERGE_STR_INIT:
2333 case SECCAT_RODATA_MERGE_CONST:
2334 sname = ".lrodata";
2335 flags = 0;
2336 break;
2337 case SECCAT_SRODATA:
2338 case SECCAT_SDATA:
2339 case SECCAT_SBSS:
2340 gcc_unreachable ();
2341 case SECCAT_TEXT:
2342 case SECCAT_TDATA:
2343 case SECCAT_TBSS:
2344 /* We don't split these for medium model. Place them into
2345 default sections and hope for best. */
2346 break;
2347 }
2348 if (sname)
2349 {
2350 /* We might get called with string constants, but get_named_section
2351 doesn't like them as they are not DECLs. Also, we need to set
2352 flags in that case. */
2353 if (!DECL_P (decl))
2354 return get_section (sname, flags, NULL);
2355 return get_named_section (decl, sname, reloc);
2356 }
2357 }
2358 return default_elf_select_section (decl, reloc, align);
2359 }
2360
2361 /* Build up a unique section name, expressed as a
2362 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2363 RELOC indicates whether the initial value of EXP requires
2364 link-time relocations. */
2365
2366 static void ATTRIBUTE_UNUSED
2367 x86_64_elf_unique_section (tree decl, int reloc)
2368 {
2369 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2370 && ix86_in_large_data_p (decl))
2371 {
2372 const char *prefix = NULL;
2373 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2374 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2375
2376 switch (categorize_decl_for_section (decl, reloc))
2377 {
2378 case SECCAT_DATA:
2379 case SECCAT_DATA_REL:
2380 case SECCAT_DATA_REL_LOCAL:
2381 case SECCAT_DATA_REL_RO:
2382 case SECCAT_DATA_REL_RO_LOCAL:
2383 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2384 break;
2385 case SECCAT_BSS:
2386 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2387 break;
2388 case SECCAT_RODATA:
2389 case SECCAT_RODATA_MERGE_STR:
2390 case SECCAT_RODATA_MERGE_STR_INIT:
2391 case SECCAT_RODATA_MERGE_CONST:
2392 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2393 break;
2394 case SECCAT_SRODATA:
2395 case SECCAT_SDATA:
2396 case SECCAT_SBSS:
2397 gcc_unreachable ();
2398 case SECCAT_TEXT:
2399 case SECCAT_TDATA:
2400 case SECCAT_TBSS:
2401 /* We don't split these for medium model. Place them into
2402 default sections and hope for best. */
2403 break;
2404 }
2405 if (prefix)
2406 {
2407 const char *name;
2408 size_t nlen, plen;
2409 char *string;
2410 plen = strlen (prefix);
2411
2412 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2413 name = targetm.strip_name_encoding (name);
2414 nlen = strlen (name);
2415
2416 string = alloca (nlen + plen + 1);
2417 memcpy (string, prefix, plen);
2418 memcpy (string + plen, name, nlen + 1);
2419
2420 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2421 return;
2422 }
2423 }
2424 default_unique_section (decl, reloc);
2425 }
2426
2427 #ifdef COMMON_ASM_OP
2428 /* This says how to output assembler code to declare an
2429 uninitialized external linkage data object.
2430
2431 For medium model x86-64 we need to use .largecomm opcode for
2432 large objects. */
2433 void
2434 x86_elf_aligned_common (FILE *file,
2435 const char *name, unsigned HOST_WIDE_INT size,
2436 int align)
2437 {
2438 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2439 && size > (unsigned int)ix86_section_threshold)
2440 fprintf (file, ".largecomm\t");
2441 else
2442 fprintf (file, "%s", COMMON_ASM_OP);
2443 assemble_name (file, name);
2444 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2445 size, align / BITS_PER_UNIT);
2446 }
2447 #endif
2448
2449 /* Utility function for targets to use in implementing
2450 ASM_OUTPUT_ALIGNED_BSS. */
2451
2452 void
2453 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2454 const char *name, unsigned HOST_WIDE_INT size,
2455 int align)
2456 {
2457 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2458 && size > (unsigned int)ix86_section_threshold)
2459 switch_to_section (get_named_section (decl, ".lbss", 0));
2460 else
2461 switch_to_section (bss_section);
2462 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2463 #ifdef ASM_DECLARE_OBJECT_NAME
2464 last_assemble_variable_decl = decl;
2465 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2466 #else
2467 /* Standard thing is just output label for the object. */
2468 ASM_OUTPUT_LABEL (file, name);
2469 #endif /* ASM_DECLARE_OBJECT_NAME */
2470 ASM_OUTPUT_SKIP (file, size ? size : 1);
2471 }
2472 \f
2473 void
2474 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2475 {
2476 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2477 make the problem with not enough registers even worse. */
2478 #ifdef INSN_SCHEDULING
2479 if (level > 1)
2480 flag_schedule_insns = 0;
2481 #endif
2482
2483 if (TARGET_MACHO)
2484 /* The Darwin libraries never set errno, so we might as well
2485 avoid calling them when that's the only reason we would. */
2486 flag_errno_math = 0;
2487
2488 /* The default values of these switches depend on the TARGET_64BIT
2489 that is not known at this moment. Mark these values with 2 and
2490 let user the to override these. In case there is no command line option
2491 specifying them, we will set the defaults in override_options. */
2492 if (optimize >= 1)
2493 flag_omit_frame_pointer = 2;
2494 flag_pcc_struct_return = 2;
2495 flag_asynchronous_unwind_tables = 2;
2496 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2497 SUBTARGET_OPTIMIZATION_OPTIONS;
2498 #endif
2499 }
2500 \f
2501 /* Decide whether we can make a sibling call to a function. DECL is the
2502 declaration of the function being targeted by the call and EXP is the
2503 CALL_EXPR representing the call. */
2504
2505 static bool
2506 ix86_function_ok_for_sibcall (tree decl, tree exp)
2507 {
2508 tree func;
2509 rtx a, b;
2510
2511 /* If we are generating position-independent code, we cannot sibcall
2512 optimize any indirect call, or a direct call to a global function,
2513 as the PLT requires %ebx be live. */
2514 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2515 return false;
2516
2517 if (decl)
2518 func = decl;
2519 else
2520 {
2521 func = TREE_TYPE (CALL_EXPR_FN (exp));
2522 if (POINTER_TYPE_P (func))
2523 func = TREE_TYPE (func);
2524 }
2525
2526 /* Check that the return value locations are the same. Like
2527 if we are returning floats on the 80387 register stack, we cannot
2528 make a sibcall from a function that doesn't return a float to a
2529 function that does or, conversely, from a function that does return
2530 a float to a function that doesn't; the necessary stack adjustment
2531 would not be executed. This is also the place we notice
2532 differences in the return value ABI. Note that it is ok for one
2533 of the functions to have void return type as long as the return
2534 value of the other is passed in a register. */
2535 a = ix86_function_value (TREE_TYPE (exp), func, false);
2536 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2537 cfun->decl, false);
2538 if (STACK_REG_P (a) || STACK_REG_P (b))
2539 {
2540 if (!rtx_equal_p (a, b))
2541 return false;
2542 }
2543 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2544 ;
2545 else if (!rtx_equal_p (a, b))
2546 return false;
2547
2548 /* If this call is indirect, we'll need to be able to use a call-clobbered
2549 register for the address of the target function. Make sure that all
2550 such registers are not used for passing parameters. */
2551 if (!decl && !TARGET_64BIT)
2552 {
2553 tree type;
2554
2555 /* We're looking at the CALL_EXPR, we need the type of the function. */
2556 type = CALL_EXPR_FN (exp); /* pointer expression */
2557 type = TREE_TYPE (type); /* pointer type */
2558 type = TREE_TYPE (type); /* function type */
2559
2560 if (ix86_function_regparm (type, NULL) >= 3)
2561 {
2562 /* ??? Need to count the actual number of registers to be used,
2563 not the possible number of registers. Fix later. */
2564 return false;
2565 }
2566 }
2567
2568 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
2569 /* Dllimport'd functions are also called indirectly. */
2570 if (decl && DECL_DLLIMPORT_P (decl)
2571 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2572 return false;
2573 #endif
2574
2575 /* If we forced aligned the stack, then sibcalling would unalign the
2576 stack, which may break the called function. */
2577 if (cfun->machine->force_align_arg_pointer)
2578 return false;
2579
2580 /* Otherwise okay. That also includes certain types of indirect calls. */
2581 return true;
2582 }
2583
2584 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2585 calling convention attributes;
2586 arguments as in struct attribute_spec.handler. */
2587
2588 static tree
2589 ix86_handle_cconv_attribute (tree *node, tree name,
2590 tree args,
2591 int flags ATTRIBUTE_UNUSED,
2592 bool *no_add_attrs)
2593 {
2594 if (TREE_CODE (*node) != FUNCTION_TYPE
2595 && TREE_CODE (*node) != METHOD_TYPE
2596 && TREE_CODE (*node) != FIELD_DECL
2597 && TREE_CODE (*node) != TYPE_DECL)
2598 {
2599 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2600 IDENTIFIER_POINTER (name));
2601 *no_add_attrs = true;
2602 return NULL_TREE;
2603 }
2604
2605 /* Can combine regparm with all attributes but fastcall. */
2606 if (is_attribute_p ("regparm", name))
2607 {
2608 tree cst;
2609
2610 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2611 {
2612 error ("fastcall and regparm attributes are not compatible");
2613 }
2614
2615 cst = TREE_VALUE (args);
2616 if (TREE_CODE (cst) != INTEGER_CST)
2617 {
2618 warning (OPT_Wattributes,
2619 "%qs attribute requires an integer constant argument",
2620 IDENTIFIER_POINTER (name));
2621 *no_add_attrs = true;
2622 }
2623 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2624 {
2625 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2626 IDENTIFIER_POINTER (name), REGPARM_MAX);
2627 *no_add_attrs = true;
2628 }
2629
2630 if (!TARGET_64BIT
2631 && lookup_attribute (ix86_force_align_arg_pointer_string,
2632 TYPE_ATTRIBUTES (*node))
2633 && compare_tree_int (cst, REGPARM_MAX-1))
2634 {
2635 error ("%s functions limited to %d register parameters",
2636 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2637 }
2638
2639 return NULL_TREE;
2640 }
2641
2642 if (TARGET_64BIT)
2643 {
2644 warning (OPT_Wattributes, "%qs attribute ignored",
2645 IDENTIFIER_POINTER (name));
2646 *no_add_attrs = true;
2647 return NULL_TREE;
2648 }
2649
2650 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2651 if (is_attribute_p ("fastcall", name))
2652 {
2653 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2654 {
2655 error ("fastcall and cdecl attributes are not compatible");
2656 }
2657 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2658 {
2659 error ("fastcall and stdcall attributes are not compatible");
2660 }
2661 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2662 {
2663 error ("fastcall and regparm attributes are not compatible");
2664 }
2665 }
2666
2667 /* Can combine stdcall with fastcall (redundant), regparm and
2668 sseregparm. */
2669 else if (is_attribute_p ("stdcall", name))
2670 {
2671 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2672 {
2673 error ("stdcall and cdecl attributes are not compatible");
2674 }
2675 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2676 {
2677 error ("stdcall and fastcall attributes are not compatible");
2678 }
2679 }
2680
2681 /* Can combine cdecl with regparm and sseregparm. */
2682 else if (is_attribute_p ("cdecl", name))
2683 {
2684 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2685 {
2686 error ("stdcall and cdecl attributes are not compatible");
2687 }
2688 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2689 {
2690 error ("fastcall and cdecl attributes are not compatible");
2691 }
2692 }
2693
2694 /* Can combine sseregparm with all attributes. */
2695
2696 return NULL_TREE;
2697 }
2698
2699 /* Return 0 if the attributes for two types are incompatible, 1 if they
2700 are compatible, and 2 if they are nearly compatible (which causes a
2701 warning to be generated). */
2702
2703 static int
2704 ix86_comp_type_attributes (tree type1, tree type2)
2705 {
2706 /* Check for mismatch of non-default calling convention. */
2707 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2708
2709 if (TREE_CODE (type1) != FUNCTION_TYPE)
2710 return 1;
2711
2712 /* Check for mismatched fastcall/regparm types. */
2713 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2714 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2715 || (ix86_function_regparm (type1, NULL)
2716 != ix86_function_regparm (type2, NULL)))
2717 return 0;
2718
2719 /* Check for mismatched sseregparm types. */
2720 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2721 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2722 return 0;
2723
2724 /* Check for mismatched return types (cdecl vs stdcall). */
2725 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2726 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2727 return 0;
2728
2729 return 1;
2730 }
2731 \f
2732 /* Return the regparm value for a function with the indicated TYPE and DECL.
2733 DECL may be NULL when calling function indirectly
2734 or considering a libcall. */
2735
2736 static int
2737 ix86_function_regparm (tree type, tree decl)
2738 {
2739 tree attr;
2740 int regparm = ix86_regparm;
2741
2742 if (TARGET_64BIT)
2743 return regparm;
2744
2745 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2746 if (attr)
2747 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2748
2749 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2750 return 2;
2751
2752 /* Use register calling convention for local functions when possible. */
2753 if (decl && flag_unit_at_a_time && !profile_flag)
2754 {
2755 struct cgraph_local_info *i = cgraph_local_info (decl);
2756 if (i && i->local)
2757 {
2758 int local_regparm, globals = 0, regno;
2759 struct function *f;
2760
2761 /* Make sure no regparm register is taken by a
2762 global register variable. */
2763 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2764 if (global_regs[local_regparm])
2765 break;
2766
2767 /* We can't use regparm(3) for nested functions as these use
2768 static chain pointer in third argument. */
2769 if (local_regparm == 3
2770 && decl_function_context (decl)
2771 && !DECL_NO_STATIC_CHAIN (decl))
2772 local_regparm = 2;
2773
2774 /* If the function realigns its stackpointer, the prologue will
2775 clobber %ecx. If we've already generated code for the callee,
2776 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2777 scanning the attributes for the self-realigning property. */
2778 f = DECL_STRUCT_FUNCTION (decl);
2779 if (local_regparm == 3
2780 && (f ? !!f->machine->force_align_arg_pointer
2781 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
2782 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
2783 local_regparm = 2;
2784
2785 /* Each global register variable increases register preassure,
2786 so the more global reg vars there are, the smaller regparm
2787 optimization use, unless requested by the user explicitly. */
2788 for (regno = 0; regno < 6; regno++)
2789 if (global_regs[regno])
2790 globals++;
2791 local_regparm
2792 = globals < local_regparm ? local_regparm - globals : 0;
2793
2794 if (local_regparm > regparm)
2795 regparm = local_regparm;
2796 }
2797 }
2798
2799 return regparm;
2800 }
2801
2802 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
2803 DFmode (2) arguments in SSE registers for a function with the
2804 indicated TYPE and DECL. DECL may be NULL when calling function
2805 indirectly or considering a libcall. Otherwise return 0. */
2806
2807 static int
2808 ix86_function_sseregparm (tree type, tree decl)
2809 {
2810 gcc_assert (!TARGET_64BIT);
2811
2812 /* Use SSE registers to pass SFmode and DFmode arguments if requested
2813 by the sseregparm attribute. */
2814 if (TARGET_SSEREGPARM
2815 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
2816 {
2817 if (!TARGET_SSE)
2818 {
2819 if (decl)
2820 error ("Calling %qD with attribute sseregparm without "
2821 "SSE/SSE2 enabled", decl);
2822 else
2823 error ("Calling %qT with attribute sseregparm without "
2824 "SSE/SSE2 enabled", type);
2825 return 0;
2826 }
2827
2828 return 2;
2829 }
2830
2831 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
2832 (and DFmode for SSE2) arguments in SSE registers. */
2833 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
2834 {
2835 struct cgraph_local_info *i = cgraph_local_info (decl);
2836 if (i && i->local)
2837 return TARGET_SSE2 ? 2 : 1;
2838 }
2839
2840 return 0;
2841 }
2842
2843 /* Return true if EAX is live at the start of the function. Used by
2844 ix86_expand_prologue to determine if we need special help before
2845 calling allocate_stack_worker. */
2846
2847 static bool
2848 ix86_eax_live_at_start_p (void)
2849 {
2850 /* Cheat. Don't bother working forward from ix86_function_regparm
2851 to the function type to whether an actual argument is located in
2852 eax. Instead just look at cfg info, which is still close enough
2853 to correct at this point. This gives false positives for broken
2854 functions that might use uninitialized data that happens to be
2855 allocated in eax, but who cares? */
2856 return REGNO_REG_SET_P (ENTRY_BLOCK_PTR->il.rtl->global_live_at_end, 0);
2857 }
2858
2859 /* Return true if TYPE has a variable argument list. */
2860
2861 static bool
2862 type_has_variadic_args_p (tree type)
2863 {
2864 tree t;
2865
2866 for (t = TYPE_ARG_TYPES (type); t; t = TREE_CHAIN (t))
2867 if (t == void_list_node)
2868 return false;
2869 return true;
2870 }
2871
2872 /* Value is the number of bytes of arguments automatically
2873 popped when returning from a subroutine call.
2874 FUNDECL is the declaration node of the function (as a tree),
2875 FUNTYPE is the data type of the function (as a tree),
2876 or for a library call it is an identifier node for the subroutine name.
2877 SIZE is the number of bytes of arguments passed on the stack.
2878
2879 On the 80386, the RTD insn may be used to pop them if the number
2880 of args is fixed, but if the number is variable then the caller
2881 must pop them all. RTD can't be used for library calls now
2882 because the library is compiled with the Unix compiler.
2883 Use of RTD is a selectable option, since it is incompatible with
2884 standard Unix calling sequences. If the option is not selected,
2885 the caller must always pop the args.
2886
2887 The attribute stdcall is equivalent to RTD on a per module basis. */
2888
2889 int
2890 ix86_return_pops_args (tree fundecl, tree funtype, int size)
2891 {
2892 int rtd;
2893
2894 /* None of the 64-bit ABIs pop arguments. */
2895 if (TARGET_64BIT)
2896 return 0;
2897
2898 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
2899
2900 /* Cdecl functions override -mrtd, and never pop the stack. */
2901 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
2902 {
2903 /* Stdcall and fastcall functions will pop the stack if not
2904 variable args. */
2905 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
2906 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
2907 rtd = 1;
2908
2909 if (rtd && ! type_has_variadic_args_p (funtype))
2910 return size;
2911 }
2912
2913 /* Lose any fake structure return argument if it is passed on the stack. */
2914 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
2915 && !KEEP_AGGREGATE_RETURN_POINTER)
2916 {
2917 int nregs = ix86_function_regparm (funtype, fundecl);
2918 if (nregs == 0)
2919 return GET_MODE_SIZE (Pmode);
2920 }
2921
2922 return 0;
2923 }
2924 \f
2925 /* Argument support functions. */
2926
2927 /* Return true when register may be used to pass function parameters. */
2928 bool
2929 ix86_function_arg_regno_p (int regno)
2930 {
2931 int i;
2932
2933 if (!TARGET_64BIT)
2934 {
2935 if (TARGET_MACHO)
2936 return (regno < REGPARM_MAX
2937 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
2938 else
2939 return (regno < REGPARM_MAX
2940 || (TARGET_MMX && MMX_REGNO_P (regno)
2941 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
2942 || (TARGET_SSE && SSE_REGNO_P (regno)
2943 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
2944 }
2945
2946 if (TARGET_MACHO)
2947 {
2948 if (SSE_REGNO_P (regno) && TARGET_SSE)
2949 return true;
2950 }
2951 else
2952 {
2953 if (TARGET_SSE && SSE_REGNO_P (regno)
2954 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
2955 return true;
2956 }
2957
2958 /* RAX is used as hidden argument to va_arg functions. */
2959 if (regno == 0)
2960 return true;
2961
2962 for (i = 0; i < REGPARM_MAX; i++)
2963 if (regno == x86_64_int_parameter_registers[i])
2964 return true;
2965 return false;
2966 }
2967
2968 /* Return if we do not know how to pass TYPE solely in registers. */
2969
2970 static bool
2971 ix86_must_pass_in_stack (enum machine_mode mode, tree type)
2972 {
2973 if (must_pass_in_stack_var_size_or_pad (mode, type))
2974 return true;
2975
2976 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
2977 The layout_type routine is crafty and tries to trick us into passing
2978 currently unsupported vector types on the stack by using TImode. */
2979 return (!TARGET_64BIT && mode == TImode
2980 && type && TREE_CODE (type) != VECTOR_TYPE);
2981 }
2982
2983 /* Initialize a variable CUM of type CUMULATIVE_ARGS
2984 for a call to a function whose data type is FNTYPE.
2985 For a library call, FNTYPE is 0. */
2986
2987 void
2988 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
2989 tree fntype, /* tree ptr for function decl */
2990 rtx libname, /* SYMBOL_REF of library name or 0 */
2991 tree fndecl)
2992 {
2993 memset (cum, 0, sizeof (*cum));
2994
2995 /* Set up the number of registers to use for passing arguments. */
2996 cum->nregs = ix86_regparm;
2997 if (TARGET_SSE)
2998 cum->sse_nregs = SSE_REGPARM_MAX;
2999 if (TARGET_MMX)
3000 cum->mmx_nregs = MMX_REGPARM_MAX;
3001 cum->warn_sse = true;
3002 cum->warn_mmx = true;
3003 cum->maybe_vaarg = (fntype ? type_has_variadic_args_p (fntype) : !libname);
3004
3005 if (!TARGET_64BIT)
3006 {
3007 /* If there are variable arguments, then we won't pass anything
3008 in registers in 32-bit mode. */
3009 if (cum->maybe_vaarg)
3010 {
3011 cum->nregs = 0;
3012 cum->sse_nregs = 0;
3013 cum->mmx_nregs = 0;
3014 cum->warn_sse = 0;
3015 cum->warn_mmx = 0;
3016 return;
3017 }
3018
3019 /* Use ecx and edx registers if function has fastcall attribute,
3020 else look for regparm information. */
3021 if (fntype)
3022 {
3023 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3024 {
3025 cum->nregs = 2;
3026 cum->fastcall = 1;
3027 }
3028 else
3029 cum->nregs = ix86_function_regparm (fntype, fndecl);
3030 }
3031
3032 /* Set up the number of SSE registers used for passing SFmode
3033 and DFmode arguments. Warn for mismatching ABI. */
3034 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3035 }
3036 }
3037
3038 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3039 But in the case of vector types, it is some vector mode.
3040
3041 When we have only some of our vector isa extensions enabled, then there
3042 are some modes for which vector_mode_supported_p is false. For these
3043 modes, the generic vector support in gcc will choose some non-vector mode
3044 in order to implement the type. By computing the natural mode, we'll
3045 select the proper ABI location for the operand and not depend on whatever
3046 the middle-end decides to do with these vector types. */
3047
3048 static enum machine_mode
3049 type_natural_mode (tree type)
3050 {
3051 enum machine_mode mode = TYPE_MODE (type);
3052
3053 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3054 {
3055 HOST_WIDE_INT size = int_size_in_bytes (type);
3056 if ((size == 8 || size == 16)
3057 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3058 && TYPE_VECTOR_SUBPARTS (type) > 1)
3059 {
3060 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3061
3062 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3063 mode = MIN_MODE_VECTOR_FLOAT;
3064 else
3065 mode = MIN_MODE_VECTOR_INT;
3066
3067 /* Get the mode which has this inner mode and number of units. */
3068 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3069 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3070 && GET_MODE_INNER (mode) == innermode)
3071 return mode;
3072
3073 gcc_unreachable ();
3074 }
3075 }
3076
3077 return mode;
3078 }
3079
3080 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3081 this may not agree with the mode that the type system has chosen for the
3082 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3083 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3084
3085 static rtx
3086 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3087 unsigned int regno)
3088 {
3089 rtx tmp;
3090
3091 if (orig_mode != BLKmode)
3092 tmp = gen_rtx_REG (orig_mode, regno);
3093 else
3094 {
3095 tmp = gen_rtx_REG (mode, regno);
3096 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3097 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3098 }
3099
3100 return tmp;
3101 }
3102
3103 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3104 of this code is to classify each 8bytes of incoming argument by the register
3105 class and assign registers accordingly. */
3106
3107 /* Return the union class of CLASS1 and CLASS2.
3108 See the x86-64 PS ABI for details. */
3109
3110 static enum x86_64_reg_class
3111 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3112 {
3113 /* Rule #1: If both classes are equal, this is the resulting class. */
3114 if (class1 == class2)
3115 return class1;
3116
3117 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3118 the other class. */
3119 if (class1 == X86_64_NO_CLASS)
3120 return class2;
3121 if (class2 == X86_64_NO_CLASS)
3122 return class1;
3123
3124 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3125 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3126 return X86_64_MEMORY_CLASS;
3127
3128 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3129 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3130 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3131 return X86_64_INTEGERSI_CLASS;
3132 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3133 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3134 return X86_64_INTEGER_CLASS;
3135
3136 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3137 MEMORY is used. */
3138 if (class1 == X86_64_X87_CLASS
3139 || class1 == X86_64_X87UP_CLASS
3140 || class1 == X86_64_COMPLEX_X87_CLASS
3141 || class2 == X86_64_X87_CLASS
3142 || class2 == X86_64_X87UP_CLASS
3143 || class2 == X86_64_COMPLEX_X87_CLASS)
3144 return X86_64_MEMORY_CLASS;
3145
3146 /* Rule #6: Otherwise class SSE is used. */
3147 return X86_64_SSE_CLASS;
3148 }
3149
3150 /* Classify the argument of type TYPE and mode MODE.
3151 CLASSES will be filled by the register class used to pass each word
3152 of the operand. The number of words is returned. In case the parameter
3153 should be passed in memory, 0 is returned. As a special case for zero
3154 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3155
3156 BIT_OFFSET is used internally for handling records and specifies offset
3157 of the offset in bits modulo 256 to avoid overflow cases.
3158
3159 See the x86-64 PS ABI for details.
3160 */
3161
3162 static int
3163 classify_argument (enum machine_mode mode, tree type,
3164 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3165 {
3166 HOST_WIDE_INT bytes =
3167 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3168 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3169
3170 /* Variable sized entities are always passed/returned in memory. */
3171 if (bytes < 0)
3172 return 0;
3173
3174 if (mode != VOIDmode
3175 && targetm.calls.must_pass_in_stack (mode, type))
3176 return 0;
3177
3178 if (type && AGGREGATE_TYPE_P (type))
3179 {
3180 int i;
3181 tree field;
3182 enum x86_64_reg_class subclasses[MAX_CLASSES];
3183
3184 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3185 if (bytes > 16)
3186 return 0;
3187
3188 for (i = 0; i < words; i++)
3189 classes[i] = X86_64_NO_CLASS;
3190
3191 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3192 signalize memory class, so handle it as special case. */
3193 if (!words)
3194 {
3195 classes[0] = X86_64_NO_CLASS;
3196 return 1;
3197 }
3198
3199 /* Classify each field of record and merge classes. */
3200 switch (TREE_CODE (type))
3201 {
3202 case RECORD_TYPE:
3203 /* And now merge the fields of structure. */
3204 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3205 {
3206 if (TREE_CODE (field) == FIELD_DECL)
3207 {
3208 int num;
3209
3210 if (TREE_TYPE (field) == error_mark_node)
3211 continue;
3212
3213 /* Bitfields are always classified as integer. Handle them
3214 early, since later code would consider them to be
3215 misaligned integers. */
3216 if (DECL_BIT_FIELD (field))
3217 {
3218 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3219 i < ((int_bit_position (field) + (bit_offset % 64))
3220 + tree_low_cst (DECL_SIZE (field), 0)
3221 + 63) / 8 / 8; i++)
3222 classes[i] =
3223 merge_classes (X86_64_INTEGER_CLASS,
3224 classes[i]);
3225 }
3226 else
3227 {
3228 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3229 TREE_TYPE (field), subclasses,
3230 (int_bit_position (field)
3231 + bit_offset) % 256);
3232 if (!num)
3233 return 0;
3234 for (i = 0; i < num; i++)
3235 {
3236 int pos =
3237 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3238 classes[i + pos] =
3239 merge_classes (subclasses[i], classes[i + pos]);
3240 }
3241 }
3242 }
3243 }
3244 break;
3245
3246 case ARRAY_TYPE:
3247 /* Arrays are handled as small records. */
3248 {
3249 int num;
3250 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3251 TREE_TYPE (type), subclasses, bit_offset);
3252 if (!num)
3253 return 0;
3254
3255 /* The partial classes are now full classes. */
3256 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3257 subclasses[0] = X86_64_SSE_CLASS;
3258 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3259 subclasses[0] = X86_64_INTEGER_CLASS;
3260
3261 for (i = 0; i < words; i++)
3262 classes[i] = subclasses[i % num];
3263
3264 break;
3265 }
3266 case UNION_TYPE:
3267 case QUAL_UNION_TYPE:
3268 /* Unions are similar to RECORD_TYPE but offset is always 0.
3269 */
3270 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3271 {
3272 if (TREE_CODE (field) == FIELD_DECL)
3273 {
3274 int num;
3275
3276 if (TREE_TYPE (field) == error_mark_node)
3277 continue;
3278
3279 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3280 TREE_TYPE (field), subclasses,
3281 bit_offset);
3282 if (!num)
3283 return 0;
3284 for (i = 0; i < num; i++)
3285 classes[i] = merge_classes (subclasses[i], classes[i]);
3286 }
3287 }
3288 break;
3289
3290 default:
3291 gcc_unreachable ();
3292 }
3293
3294 /* Final merger cleanup. */
3295 for (i = 0; i < words; i++)
3296 {
3297 /* If one class is MEMORY, everything should be passed in
3298 memory. */
3299 if (classes[i] == X86_64_MEMORY_CLASS)
3300 return 0;
3301
3302 /* The X86_64_SSEUP_CLASS should be always preceded by
3303 X86_64_SSE_CLASS. */
3304 if (classes[i] == X86_64_SSEUP_CLASS
3305 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3306 classes[i] = X86_64_SSE_CLASS;
3307
3308 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3309 if (classes[i] == X86_64_X87UP_CLASS
3310 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3311 classes[i] = X86_64_SSE_CLASS;
3312 }
3313 return words;
3314 }
3315
3316 /* Compute alignment needed. We align all types to natural boundaries with
3317 exception of XFmode that is aligned to 64bits. */
3318 if (mode != VOIDmode && mode != BLKmode)
3319 {
3320 int mode_alignment = GET_MODE_BITSIZE (mode);
3321
3322 if (mode == XFmode)
3323 mode_alignment = 128;
3324 else if (mode == XCmode)
3325 mode_alignment = 256;
3326 if (COMPLEX_MODE_P (mode))
3327 mode_alignment /= 2;
3328 /* Misaligned fields are always returned in memory. */
3329 if (bit_offset % mode_alignment)
3330 return 0;
3331 }
3332
3333 /* for V1xx modes, just use the base mode */
3334 if (VECTOR_MODE_P (mode)
3335 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3336 mode = GET_MODE_INNER (mode);
3337
3338 /* Classification of atomic types. */
3339 switch (mode)
3340 {
3341 case SDmode:
3342 case DDmode:
3343 classes[0] = X86_64_SSE_CLASS;
3344 return 1;
3345 case TDmode:
3346 classes[0] = X86_64_SSE_CLASS;
3347 classes[1] = X86_64_SSEUP_CLASS;
3348 return 2;
3349 case DImode:
3350 case SImode:
3351 case HImode:
3352 case QImode:
3353 case CSImode:
3354 case CHImode:
3355 case CQImode:
3356 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3357 classes[0] = X86_64_INTEGERSI_CLASS;
3358 else
3359 classes[0] = X86_64_INTEGER_CLASS;
3360 return 1;
3361 case CDImode:
3362 case TImode:
3363 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3364 return 2;
3365 case CTImode:
3366 return 0;
3367 case SFmode:
3368 if (!(bit_offset % 64))
3369 classes[0] = X86_64_SSESF_CLASS;
3370 else
3371 classes[0] = X86_64_SSE_CLASS;
3372 return 1;
3373 case DFmode:
3374 classes[0] = X86_64_SSEDF_CLASS;
3375 return 1;
3376 case XFmode:
3377 classes[0] = X86_64_X87_CLASS;
3378 classes[1] = X86_64_X87UP_CLASS;
3379 return 2;
3380 case TFmode:
3381 classes[0] = X86_64_SSE_CLASS;
3382 classes[1] = X86_64_SSEUP_CLASS;
3383 return 2;
3384 case SCmode:
3385 classes[0] = X86_64_SSE_CLASS;
3386 return 1;
3387 case DCmode:
3388 classes[0] = X86_64_SSEDF_CLASS;
3389 classes[1] = X86_64_SSEDF_CLASS;
3390 return 2;
3391 case XCmode:
3392 classes[0] = X86_64_COMPLEX_X87_CLASS;
3393 return 1;
3394 case TCmode:
3395 /* This modes is larger than 16 bytes. */
3396 return 0;
3397 case V4SFmode:
3398 case V4SImode:
3399 case V16QImode:
3400 case V8HImode:
3401 case V2DFmode:
3402 case V2DImode:
3403 classes[0] = X86_64_SSE_CLASS;
3404 classes[1] = X86_64_SSEUP_CLASS;
3405 return 2;
3406 case V2SFmode:
3407 case V2SImode:
3408 case V4HImode:
3409 case V8QImode:
3410 classes[0] = X86_64_SSE_CLASS;
3411 return 1;
3412 case BLKmode:
3413 case VOIDmode:
3414 return 0;
3415 default:
3416 gcc_assert (VECTOR_MODE_P (mode));
3417
3418 if (bytes > 16)
3419 return 0;
3420
3421 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3422
3423 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3424 classes[0] = X86_64_INTEGERSI_CLASS;
3425 else
3426 classes[0] = X86_64_INTEGER_CLASS;
3427 classes[1] = X86_64_INTEGER_CLASS;
3428 return 1 + (bytes > 8);
3429 }
3430 }
3431
3432 /* Examine the argument and return set number of register required in each
3433 class. Return 0 iff parameter should be passed in memory. */
3434 static int
3435 examine_argument (enum machine_mode mode, tree type, int in_return,
3436 int *int_nregs, int *sse_nregs)
3437 {
3438 enum x86_64_reg_class class[MAX_CLASSES];
3439 int n = classify_argument (mode, type, class, 0);
3440
3441 *int_nregs = 0;
3442 *sse_nregs = 0;
3443 if (!n)
3444 return 0;
3445 for (n--; n >= 0; n--)
3446 switch (class[n])
3447 {
3448 case X86_64_INTEGER_CLASS:
3449 case X86_64_INTEGERSI_CLASS:
3450 (*int_nregs)++;
3451 break;
3452 case X86_64_SSE_CLASS:
3453 case X86_64_SSESF_CLASS:
3454 case X86_64_SSEDF_CLASS:
3455 (*sse_nregs)++;
3456 break;
3457 case X86_64_NO_CLASS:
3458 case X86_64_SSEUP_CLASS:
3459 break;
3460 case X86_64_X87_CLASS:
3461 case X86_64_X87UP_CLASS:
3462 if (!in_return)
3463 return 0;
3464 break;
3465 case X86_64_COMPLEX_X87_CLASS:
3466 return in_return ? 2 : 0;
3467 case X86_64_MEMORY_CLASS:
3468 gcc_unreachable ();
3469 }
3470 return 1;
3471 }
3472
3473 /* Construct container for the argument used by GCC interface. See
3474 FUNCTION_ARG for the detailed description. */
3475
3476 static rtx
3477 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3478 tree type, int in_return, int nintregs, int nsseregs,
3479 const int *intreg, int sse_regno)
3480 {
3481 /* The following variables hold the static issued_error state. */
3482 static bool issued_sse_arg_error;
3483 static bool issued_sse_ret_error;
3484 static bool issued_x87_ret_error;
3485
3486 enum machine_mode tmpmode;
3487 int bytes =
3488 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3489 enum x86_64_reg_class class[MAX_CLASSES];
3490 int n;
3491 int i;
3492 int nexps = 0;
3493 int needed_sseregs, needed_intregs;
3494 rtx exp[MAX_CLASSES];
3495 rtx ret;
3496
3497 n = classify_argument (mode, type, class, 0);
3498 if (!n)
3499 return NULL;
3500 if (!examine_argument (mode, type, in_return, &needed_intregs,
3501 &needed_sseregs))
3502 return NULL;
3503 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3504 return NULL;
3505
3506 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3507 some less clueful developer tries to use floating-point anyway. */
3508 if (needed_sseregs && !TARGET_SSE)
3509 {
3510 if (in_return)
3511 {
3512 if (!issued_sse_ret_error)
3513 {
3514 error ("SSE register return with SSE disabled");
3515 issued_sse_ret_error = true;
3516 }
3517 }
3518 else if (!issued_sse_arg_error)
3519 {
3520 error ("SSE register argument with SSE disabled");
3521 issued_sse_arg_error = true;
3522 }
3523 return NULL;
3524 }
3525
3526 /* Likewise, error if the ABI requires us to return values in the
3527 x87 registers and the user specified -mno-80387. */
3528 if (!TARGET_80387 && in_return)
3529 for (i = 0; i < n; i++)
3530 if (class[i] == X86_64_X87_CLASS
3531 || class[i] == X86_64_X87UP_CLASS
3532 || class[i] == X86_64_COMPLEX_X87_CLASS)
3533 {
3534 if (!issued_x87_ret_error)
3535 {
3536 error ("x87 register return with x87 disabled");
3537 issued_x87_ret_error = true;
3538 }
3539 return NULL;
3540 }
3541
3542 /* First construct simple cases. Avoid SCmode, since we want to use
3543 single register to pass this type. */
3544 if (n == 1 && mode != SCmode)
3545 switch (class[0])
3546 {
3547 case X86_64_INTEGER_CLASS:
3548 case X86_64_INTEGERSI_CLASS:
3549 return gen_rtx_REG (mode, intreg[0]);
3550 case X86_64_SSE_CLASS:
3551 case X86_64_SSESF_CLASS:
3552 case X86_64_SSEDF_CLASS:
3553 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3554 case X86_64_X87_CLASS:
3555 case X86_64_COMPLEX_X87_CLASS:
3556 return gen_rtx_REG (mode, FIRST_STACK_REG);
3557 case X86_64_NO_CLASS:
3558 /* Zero sized array, struct or class. */
3559 return NULL;
3560 default:
3561 gcc_unreachable ();
3562 }
3563 if (n == 2 && class[0] == X86_64_SSE_CLASS && class[1] == X86_64_SSEUP_CLASS
3564 && mode != BLKmode)
3565 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3566
3567 if (n == 2
3568 && class[0] == X86_64_X87_CLASS && class[1] == X86_64_X87UP_CLASS)
3569 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3570 if (n == 2 && class[0] == X86_64_INTEGER_CLASS
3571 && class[1] == X86_64_INTEGER_CLASS
3572 && (mode == CDImode || mode == TImode || mode == TFmode)
3573 && intreg[0] + 1 == intreg[1])
3574 return gen_rtx_REG (mode, intreg[0]);
3575
3576 /* Otherwise figure out the entries of the PARALLEL. */
3577 for (i = 0; i < n; i++)
3578 {
3579 switch (class[i])
3580 {
3581 case X86_64_NO_CLASS:
3582 break;
3583 case X86_64_INTEGER_CLASS:
3584 case X86_64_INTEGERSI_CLASS:
3585 /* Merge TImodes on aligned occasions here too. */
3586 if (i * 8 + 8 > bytes)
3587 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3588 else if (class[i] == X86_64_INTEGERSI_CLASS)
3589 tmpmode = SImode;
3590 else
3591 tmpmode = DImode;
3592 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3593 if (tmpmode == BLKmode)
3594 tmpmode = DImode;
3595 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3596 gen_rtx_REG (tmpmode, *intreg),
3597 GEN_INT (i*8));
3598 intreg++;
3599 break;
3600 case X86_64_SSESF_CLASS:
3601 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3602 gen_rtx_REG (SFmode,
3603 SSE_REGNO (sse_regno)),
3604 GEN_INT (i*8));
3605 sse_regno++;
3606 break;
3607 case X86_64_SSEDF_CLASS:
3608 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3609 gen_rtx_REG (DFmode,
3610 SSE_REGNO (sse_regno)),
3611 GEN_INT (i*8));
3612 sse_regno++;
3613 break;
3614 case X86_64_SSE_CLASS:
3615 if (i < n - 1 && class[i + 1] == X86_64_SSEUP_CLASS)
3616 tmpmode = TImode;
3617 else
3618 tmpmode = DImode;
3619 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3620 gen_rtx_REG (tmpmode,
3621 SSE_REGNO (sse_regno)),
3622 GEN_INT (i*8));
3623 if (tmpmode == TImode)
3624 i++;
3625 sse_regno++;
3626 break;
3627 default:
3628 gcc_unreachable ();
3629 }
3630 }
3631
3632 /* Empty aligned struct, union or class. */
3633 if (nexps == 0)
3634 return NULL;
3635
3636 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3637 for (i = 0; i < nexps; i++)
3638 XVECEXP (ret, 0, i) = exp [i];
3639 return ret;
3640 }
3641
3642 /* Update the data in CUM to advance over an argument of mode MODE
3643 and data type TYPE. (TYPE is null for libcalls where that information
3644 may not be available.) */
3645
3646 static void
3647 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3648 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3649 {
3650 switch (mode)
3651 {
3652 default:
3653 break;
3654
3655 case BLKmode:
3656 if (bytes < 0)
3657 break;
3658 /* FALLTHRU */
3659
3660 case DImode:
3661 case SImode:
3662 case HImode:
3663 case QImode:
3664 cum->words += words;
3665 cum->nregs -= words;
3666 cum->regno += words;
3667
3668 if (cum->nregs <= 0)
3669 {
3670 cum->nregs = 0;
3671 cum->regno = 0;
3672 }
3673 break;
3674
3675 case DFmode:
3676 if (cum->float_in_sse < 2)
3677 break;
3678 case SFmode:
3679 if (cum->float_in_sse < 1)
3680 break;
3681 /* FALLTHRU */
3682
3683 case TImode:
3684 case V16QImode:
3685 case V8HImode:
3686 case V4SImode:
3687 case V2DImode:
3688 case V4SFmode:
3689 case V2DFmode:
3690 if (!type || !AGGREGATE_TYPE_P (type))
3691 {
3692 cum->sse_words += words;
3693 cum->sse_nregs -= 1;
3694 cum->sse_regno += 1;
3695 if (cum->sse_nregs <= 0)
3696 {
3697 cum->sse_nregs = 0;
3698 cum->sse_regno = 0;
3699 }
3700 }
3701 break;
3702
3703 case V8QImode:
3704 case V4HImode:
3705 case V2SImode:
3706 case V2SFmode:
3707 if (!type || !AGGREGATE_TYPE_P (type))
3708 {
3709 cum->mmx_words += words;
3710 cum->mmx_nregs -= 1;
3711 cum->mmx_regno += 1;
3712 if (cum->mmx_nregs <= 0)
3713 {
3714 cum->mmx_nregs = 0;
3715 cum->mmx_regno = 0;
3716 }
3717 }
3718 break;
3719 }
3720 }
3721
3722 static void
3723 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3724 tree type, HOST_WIDE_INT words)
3725 {
3726 int int_nregs, sse_nregs;
3727
3728 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3729 cum->words += words;
3730 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3731 {
3732 cum->nregs -= int_nregs;
3733 cum->sse_nregs -= sse_nregs;
3734 cum->regno += int_nregs;
3735 cum->sse_regno += sse_nregs;
3736 }
3737 else
3738 cum->words += words;
3739 }
3740
3741 void
3742 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3743 tree type, int named ATTRIBUTE_UNUSED)
3744 {
3745 HOST_WIDE_INT bytes, words;
3746
3747 if (mode == BLKmode)
3748 bytes = int_size_in_bytes (type);
3749 else
3750 bytes = GET_MODE_SIZE (mode);
3751 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3752
3753 if (type)
3754 mode = type_natural_mode (type);
3755
3756 if (TARGET_64BIT)
3757 function_arg_advance_64 (cum, mode, type, words);
3758 else
3759 function_arg_advance_32 (cum, mode, type, bytes, words);
3760 }
3761
3762 /* Define where to put the arguments to a function.
3763 Value is zero to push the argument on the stack,
3764 or a hard register in which to store the argument.
3765
3766 MODE is the argument's machine mode.
3767 TYPE is the data type of the argument (as a tree).
3768 This is null for libcalls where that information may
3769 not be available.
3770 CUM is a variable of type CUMULATIVE_ARGS which gives info about
3771 the preceding args and about the function being called.
3772 NAMED is nonzero if this argument is a named parameter
3773 (otherwise it is an extra parameter matching an ellipsis). */
3774
3775 static rtx
3776 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3777 enum machine_mode orig_mode, tree type,
3778 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3779 {
3780 static bool warnedsse, warnedmmx;
3781
3782 /* Avoid the AL settings for the Unix64 ABI. */
3783 if (mode == VOIDmode)
3784 return constm1_rtx;
3785
3786 switch (mode)
3787 {
3788 default:
3789 break;
3790
3791 case BLKmode:
3792 if (bytes < 0)
3793 break;
3794 /* FALLTHRU */
3795 case DImode:
3796 case SImode:
3797 case HImode:
3798 case QImode:
3799 if (words <= cum->nregs)
3800 {
3801 int regno = cum->regno;
3802
3803 /* Fastcall allocates the first two DWORD (SImode) or
3804 smaller arguments to ECX and EDX. */
3805 if (cum->fastcall)
3806 {
3807 if (mode == BLKmode || mode == DImode)
3808 break;
3809
3810 /* ECX not EAX is the first allocated register. */
3811 if (regno == 0)
3812 regno = 2;
3813 }
3814 return gen_rtx_REG (mode, regno);
3815 }
3816 break;
3817
3818 case DFmode:
3819 if (cum->float_in_sse < 2)
3820 break;
3821 case SFmode:
3822 if (cum->float_in_sse < 1)
3823 break;
3824 /* FALLTHRU */
3825 case TImode:
3826 case V16QImode:
3827 case V8HImode:
3828 case V4SImode:
3829 case V2DImode:
3830 case V4SFmode:
3831 case V2DFmode:
3832 if (!type || !AGGREGATE_TYPE_P (type))
3833 {
3834 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
3835 {
3836 warnedsse = true;
3837 warning (0, "SSE vector argument without SSE enabled "
3838 "changes the ABI");
3839 }
3840 if (cum->sse_nregs)
3841 return gen_reg_or_parallel (mode, orig_mode,
3842 cum->sse_regno + FIRST_SSE_REG);
3843 }
3844 break;
3845
3846 case V8QImode:
3847 case V4HImode:
3848 case V2SImode:
3849 case V2SFmode:
3850 if (!type || !AGGREGATE_TYPE_P (type))
3851 {
3852 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
3853 {
3854 warnedmmx = true;
3855 warning (0, "MMX vector argument without MMX enabled "
3856 "changes the ABI");
3857 }
3858 if (cum->mmx_nregs)
3859 return gen_reg_or_parallel (mode, orig_mode,
3860 cum->mmx_regno + FIRST_MMX_REG);
3861 }
3862 break;
3863 }
3864
3865 return NULL_RTX;
3866 }
3867
3868 static rtx
3869 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3870 enum machine_mode orig_mode, tree type)
3871 {
3872 /* Handle a hidden AL argument containing number of registers
3873 for varargs x86-64 functions. */
3874 if (mode == VOIDmode)
3875 return GEN_INT (cum->maybe_vaarg
3876 ? (cum->sse_nregs < 0
3877 ? SSE_REGPARM_MAX
3878 : cum->sse_regno)
3879 : -1);
3880
3881 return construct_container (mode, orig_mode, type, 0, cum->nregs,
3882 cum->sse_nregs,
3883 &x86_64_int_parameter_registers [cum->regno],
3884 cum->sse_regno);
3885 }
3886
3887 rtx
3888 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
3889 tree type, int named ATTRIBUTE_UNUSED)
3890 {
3891 enum machine_mode mode = omode;
3892 HOST_WIDE_INT bytes, words;
3893
3894 if (mode == BLKmode)
3895 bytes = int_size_in_bytes (type);
3896 else
3897 bytes = GET_MODE_SIZE (mode);
3898 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3899
3900 /* To simplify the code below, represent vector types with a vector mode
3901 even if MMX/SSE are not active. */
3902 if (type && TREE_CODE (type) == VECTOR_TYPE)
3903 mode = type_natural_mode (type);
3904
3905 if (TARGET_64BIT)
3906 return function_arg_64 (cum, mode, omode, type);
3907 else
3908 return function_arg_32 (cum, mode, omode, type, bytes, words);
3909 }
3910
3911 /* A C expression that indicates when an argument must be passed by
3912 reference. If nonzero for an argument, a copy of that argument is
3913 made in memory and a pointer to the argument is passed instead of
3914 the argument itself. The pointer is passed in whatever way is
3915 appropriate for passing a pointer to that type. */
3916
3917 static bool
3918 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
3919 enum machine_mode mode ATTRIBUTE_UNUSED,
3920 tree type, bool named ATTRIBUTE_UNUSED)
3921 {
3922 if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
3923 return 1;
3924
3925 return 0;
3926 }
3927
3928 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
3929 ABI. Only called if TARGET_SSE. */
3930 static bool
3931 contains_128bit_aligned_vector_p (tree type)
3932 {
3933 enum machine_mode mode = TYPE_MODE (type);
3934 if (SSE_REG_MODE_P (mode)
3935 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
3936 return true;
3937 if (TYPE_ALIGN (type) < 128)
3938 return false;
3939
3940 if (AGGREGATE_TYPE_P (type))
3941 {
3942 /* Walk the aggregates recursively. */
3943 switch (TREE_CODE (type))
3944 {
3945 case RECORD_TYPE:
3946 case UNION_TYPE:
3947 case QUAL_UNION_TYPE:
3948 {
3949 tree field;
3950
3951 /* Walk all the structure fields. */
3952 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3953 {
3954 if (TREE_CODE (field) == FIELD_DECL
3955 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
3956 return true;
3957 }
3958 break;
3959 }
3960
3961 case ARRAY_TYPE:
3962 /* Just for use if some languages passes arrays by value. */
3963 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
3964 return true;
3965 break;
3966
3967 default:
3968 gcc_unreachable ();
3969 }
3970 }
3971 return false;
3972 }
3973
3974 /* Gives the alignment boundary, in bits, of an argument with the
3975 specified mode and type. */
3976
3977 int
3978 ix86_function_arg_boundary (enum machine_mode mode, tree type)
3979 {
3980 int align;
3981 if (type)
3982 align = TYPE_ALIGN (type);
3983 else
3984 align = GET_MODE_ALIGNMENT (mode);
3985 if (align < PARM_BOUNDARY)
3986 align = PARM_BOUNDARY;
3987 if (!TARGET_64BIT)
3988 {
3989 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
3990 make an exception for SSE modes since these require 128bit
3991 alignment.
3992
3993 The handling here differs from field_alignment. ICC aligns MMX
3994 arguments to 4 byte boundaries, while structure fields are aligned
3995 to 8 byte boundaries. */
3996 if (!TARGET_SSE)
3997 align = PARM_BOUNDARY;
3998 else if (!type)
3999 {
4000 if (!SSE_REG_MODE_P (mode))
4001 align = PARM_BOUNDARY;
4002 }
4003 else
4004 {
4005 if (!contains_128bit_aligned_vector_p (type))
4006 align = PARM_BOUNDARY;
4007 }
4008 }
4009 if (align > 128)
4010 align = 128;
4011 return align;
4012 }
4013
4014 /* Return true if N is a possible register number of function value. */
4015
4016 bool
4017 ix86_function_value_regno_p (int regno)
4018 {
4019 switch (regno)
4020 {
4021 case 0:
4022 return true;
4023
4024 case FIRST_FLOAT_REG:
4025 return TARGET_FLOAT_RETURNS_IN_80387;
4026
4027 case FIRST_SSE_REG:
4028 return TARGET_SSE;
4029
4030 case FIRST_MMX_REG:
4031 if (TARGET_MACHO || TARGET_64BIT)
4032 return false;
4033 return TARGET_MMX;
4034 }
4035
4036 return false;
4037 }
4038
4039 /* Define how to find the value returned by a function.
4040 VALTYPE is the data type of the value (as a tree).
4041 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4042 otherwise, FUNC is 0. */
4043
4044 static rtx
4045 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4046 tree fntype, tree fn)
4047 {
4048 unsigned int regno;
4049
4050 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4051 we normally prevent this case when mmx is not available. However
4052 some ABIs may require the result to be returned like DImode. */
4053 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4054 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4055
4056 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4057 we prevent this case when sse is not available. However some ABIs
4058 may require the result to be returned like integer TImode. */
4059 else if (mode == TImode
4060 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4061 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4062
4063 /* Decimal floating point values can go in %eax, unlike other float modes. */
4064 else if (DECIMAL_FLOAT_MODE_P (mode))
4065 regno = 0;
4066
4067 /* Most things go in %eax, except (unless -mno-fp-ret-in-387) fp values. */
4068 else if (!SCALAR_FLOAT_MODE_P (mode) || !TARGET_FLOAT_RETURNS_IN_80387)
4069 regno = 0;
4070
4071 /* Floating point return values in %st(0), except for local functions when
4072 SSE math is enabled or for functions with sseregparm attribute. */
4073 else
4074 {
4075 regno = FIRST_FLOAT_REG;
4076
4077 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4078 {
4079 int sse_level = ix86_function_sseregparm (fntype, fn);
4080 if ((sse_level >= 1 && mode == SFmode)
4081 || (sse_level == 2 && mode == DFmode))
4082 regno = FIRST_SSE_REG;
4083 }
4084 }
4085
4086 return gen_rtx_REG (orig_mode, regno);
4087 }
4088
4089 static rtx
4090 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4091 tree valtype)
4092 {
4093 rtx ret;
4094
4095 /* Handle libcalls, which don't provide a type node. */
4096 if (valtype == NULL)
4097 {
4098 switch (mode)
4099 {
4100 case SFmode:
4101 case SCmode:
4102 case DFmode:
4103 case DCmode:
4104 case TFmode:
4105 case SDmode:
4106 case DDmode:
4107 case TDmode:
4108 return gen_rtx_REG (mode, FIRST_SSE_REG);
4109 case XFmode:
4110 case XCmode:
4111 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4112 case TCmode:
4113 return NULL;
4114 default:
4115 return gen_rtx_REG (mode, 0);
4116 }
4117 }
4118
4119 ret = construct_container (mode, orig_mode, valtype, 1,
4120 REGPARM_MAX, SSE_REGPARM_MAX,
4121 x86_64_int_return_registers, 0);
4122
4123 /* For zero sized structures, construct_container returns NULL, but we
4124 need to keep rest of compiler happy by returning meaningful value. */
4125 if (!ret)
4126 ret = gen_rtx_REG (orig_mode, 0);
4127
4128 return ret;
4129 }
4130
4131 static rtx
4132 ix86_function_value_1 (tree valtype, tree fntype_or_decl,
4133 enum machine_mode orig_mode, enum machine_mode mode)
4134 {
4135 tree fn, fntype;
4136
4137 fn = NULL_TREE;
4138 if (fntype_or_decl && DECL_P (fntype_or_decl))
4139 fn = fntype_or_decl;
4140 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4141
4142 if (TARGET_64BIT)
4143 return function_value_64 (orig_mode, mode, valtype);
4144 else
4145 return function_value_32 (orig_mode, mode, fntype, fn);
4146 }
4147
4148 static rtx
4149 ix86_function_value (tree valtype, tree fntype_or_decl,
4150 bool outgoing ATTRIBUTE_UNUSED)
4151 {
4152 enum machine_mode mode, orig_mode;
4153
4154 orig_mode = TYPE_MODE (valtype);
4155 mode = type_natural_mode (valtype);
4156 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4157 }
4158
4159 rtx
4160 ix86_libcall_value (enum machine_mode mode)
4161 {
4162 return ix86_function_value_1 (NULL, NULL, mode, mode);
4163 }
4164
4165 /* Return true iff type is returned in memory. */
4166
4167 static int
4168 return_in_memory_32 (tree type, enum machine_mode mode)
4169 {
4170 HOST_WIDE_INT size;
4171
4172 if (mode == BLKmode)
4173 return 1;
4174
4175 size = int_size_in_bytes (type);
4176
4177 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4178 return 0;
4179
4180 if (VECTOR_MODE_P (mode) || mode == TImode)
4181 {
4182 /* User-created vectors small enough to fit in EAX. */
4183 if (size < 8)
4184 return 0;
4185
4186 /* MMX/3dNow values are returned in MM0,
4187 except when it doesn't exits. */
4188 if (size == 8)
4189 return (TARGET_MMX ? 0 : 1);
4190
4191 /* SSE values are returned in XMM0, except when it doesn't exist. */
4192 if (size == 16)
4193 return (TARGET_SSE ? 0 : 1);
4194 }
4195
4196 if (mode == XFmode)
4197 return 0;
4198
4199 if (mode == TDmode)
4200 return 1;
4201
4202 if (size > 12)
4203 return 1;
4204 return 0;
4205 }
4206
4207 static int
4208 return_in_memory_64 (tree type, enum machine_mode mode)
4209 {
4210 int needed_intregs, needed_sseregs;
4211 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4212 }
4213
4214 int
4215 ix86_return_in_memory (tree type)
4216 {
4217 enum machine_mode mode = type_natural_mode (type);
4218
4219 if (TARGET_64BIT)
4220 return return_in_memory_64 (type, mode);
4221 else
4222 return return_in_memory_32 (type, mode);
4223 }
4224
4225 /* When returning SSE vector types, we have a choice of either
4226 (1) being abi incompatible with a -march switch, or
4227 (2) generating an error.
4228 Given no good solution, I think the safest thing is one warning.
4229 The user won't be able to use -Werror, but....
4230
4231 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4232 called in response to actually generating a caller or callee that
4233 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4234 via aggregate_value_p for general type probing from tree-ssa. */
4235
4236 static rtx
4237 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4238 {
4239 static bool warnedsse, warnedmmx;
4240
4241 if (!TARGET_64BIT && type)
4242 {
4243 /* Look at the return type of the function, not the function type. */
4244 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4245
4246 if (!TARGET_SSE && !warnedsse)
4247 {
4248 if (mode == TImode
4249 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4250 {
4251 warnedsse = true;
4252 warning (0, "SSE vector return without SSE enabled "
4253 "changes the ABI");
4254 }
4255 }
4256
4257 if (!TARGET_MMX && !warnedmmx)
4258 {
4259 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4260 {
4261 warnedmmx = true;
4262 warning (0, "MMX vector return without MMX enabled "
4263 "changes the ABI");
4264 }
4265 }
4266 }
4267
4268 return NULL;
4269 }
4270
4271 \f
4272 /* Create the va_list data type. */
4273
4274 static tree
4275 ix86_build_builtin_va_list (void)
4276 {
4277 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4278
4279 /* For i386 we use plain pointer to argument area. */
4280 if (!TARGET_64BIT)
4281 return build_pointer_type (char_type_node);
4282
4283 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4284 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4285
4286 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4287 unsigned_type_node);
4288 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4289 unsigned_type_node);
4290 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4291 ptr_type_node);
4292 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4293 ptr_type_node);
4294
4295 va_list_gpr_counter_field = f_gpr;
4296 va_list_fpr_counter_field = f_fpr;
4297
4298 DECL_FIELD_CONTEXT (f_gpr) = record;
4299 DECL_FIELD_CONTEXT (f_fpr) = record;
4300 DECL_FIELD_CONTEXT (f_ovf) = record;
4301 DECL_FIELD_CONTEXT (f_sav) = record;
4302
4303 TREE_CHAIN (record) = type_decl;
4304 TYPE_NAME (record) = type_decl;
4305 TYPE_FIELDS (record) = f_gpr;
4306 TREE_CHAIN (f_gpr) = f_fpr;
4307 TREE_CHAIN (f_fpr) = f_ovf;
4308 TREE_CHAIN (f_ovf) = f_sav;
4309
4310 layout_type (record);
4311
4312 /* The correct type is an array type of one element. */
4313 return build_array_type (record, build_index_type (size_zero_node));
4314 }
4315
4316 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4317
4318 static void
4319 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4320 {
4321 rtx save_area, mem;
4322 rtx label;
4323 rtx label_ref;
4324 rtx tmp_reg;
4325 rtx nsse_reg;
4326 int set;
4327 int i;
4328
4329 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4330 return;
4331
4332 /* Indicate to allocate space on the stack for varargs save area. */
4333 ix86_save_varrargs_registers = 1;
4334 cfun->stack_alignment_needed = 128;
4335
4336 save_area = frame_pointer_rtx;
4337 set = get_varargs_alias_set ();
4338
4339 for (i = cum->regno;
4340 i < ix86_regparm
4341 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4342 i++)
4343 {
4344 mem = gen_rtx_MEM (Pmode,
4345 plus_constant (save_area, i * UNITS_PER_WORD));
4346 MEM_NOTRAP_P (mem) = 1;
4347 set_mem_alias_set (mem, set);
4348 emit_move_insn (mem, gen_rtx_REG (Pmode,
4349 x86_64_int_parameter_registers[i]));
4350 }
4351
4352 if (cum->sse_nregs && cfun->va_list_fpr_size)
4353 {
4354 /* Now emit code to save SSE registers. The AX parameter contains number
4355 of SSE parameter registers used to call this function. We use
4356 sse_prologue_save insn template that produces computed jump across
4357 SSE saves. We need some preparation work to get this working. */
4358
4359 label = gen_label_rtx ();
4360 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4361
4362 /* Compute address to jump to :
4363 label - 5*eax + nnamed_sse_arguments*5 */
4364 tmp_reg = gen_reg_rtx (Pmode);
4365 nsse_reg = gen_reg_rtx (Pmode);
4366 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4367 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4368 gen_rtx_MULT (Pmode, nsse_reg,
4369 GEN_INT (4))));
4370 if (cum->sse_regno)
4371 emit_move_insn
4372 (nsse_reg,
4373 gen_rtx_CONST (DImode,
4374 gen_rtx_PLUS (DImode,
4375 label_ref,
4376 GEN_INT (cum->sse_regno * 4))));
4377 else
4378 emit_move_insn (nsse_reg, label_ref);
4379 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4380
4381 /* Compute address of memory block we save into. We always use pointer
4382 pointing 127 bytes after first byte to store - this is needed to keep
4383 instruction size limited by 4 bytes. */
4384 tmp_reg = gen_reg_rtx (Pmode);
4385 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4386 plus_constant (save_area,
4387 8 * REGPARM_MAX + 127)));
4388 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4389 MEM_NOTRAP_P (mem) = 1;
4390 set_mem_alias_set (mem, set);
4391 set_mem_align (mem, BITS_PER_WORD);
4392
4393 /* And finally do the dirty job! */
4394 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4395 GEN_INT (cum->sse_regno), label));
4396 }
4397 }
4398
4399 static void
4400 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4401 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4402 int no_rtl)
4403 {
4404 CUMULATIVE_ARGS next_cum;
4405 tree fntype;
4406 int stdarg_p;
4407
4408 /* This argument doesn't appear to be used anymore. Which is good,
4409 because the old code here didn't suppress rtl generation. */
4410 gcc_assert (!no_rtl);
4411
4412 if (!TARGET_64BIT)
4413 return;
4414
4415 fntype = TREE_TYPE (current_function_decl);
4416 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4417 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4418 != void_type_node));
4419
4420 /* For varargs, we do not want to skip the dummy va_dcl argument.
4421 For stdargs, we do want to skip the last named argument. */
4422 next_cum = *cum;
4423 if (stdarg_p)
4424 function_arg_advance (&next_cum, mode, type, 1);
4425
4426 setup_incoming_varargs_64 (&next_cum);
4427 }
4428
4429 /* Implement va_start. */
4430
4431 void
4432 ix86_va_start (tree valist, rtx nextarg)
4433 {
4434 HOST_WIDE_INT words, n_gpr, n_fpr;
4435 tree f_gpr, f_fpr, f_ovf, f_sav;
4436 tree gpr, fpr, ovf, sav, t;
4437 tree type;
4438
4439 /* Only 64bit target needs something special. */
4440 if (!TARGET_64BIT)
4441 {
4442 std_expand_builtin_va_start (valist, nextarg);
4443 return;
4444 }
4445
4446 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4447 f_fpr = TREE_CHAIN (f_gpr);
4448 f_ovf = TREE_CHAIN (f_fpr);
4449 f_sav = TREE_CHAIN (f_ovf);
4450
4451 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4452 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4453 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4454 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4455 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4456
4457 /* Count number of gp and fp argument registers used. */
4458 words = current_function_args_info.words;
4459 n_gpr = current_function_args_info.regno;
4460 n_fpr = current_function_args_info.sse_regno;
4461
4462 if (cfun->va_list_gpr_size)
4463 {
4464 type = TREE_TYPE (gpr);
4465 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4466 build_int_cst (type, n_gpr * 8));
4467 TREE_SIDE_EFFECTS (t) = 1;
4468 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4469 }
4470
4471 if (cfun->va_list_fpr_size)
4472 {
4473 type = TREE_TYPE (fpr);
4474 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4475 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4476 TREE_SIDE_EFFECTS (t) = 1;
4477 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4478 }
4479
4480 /* Find the overflow area. */
4481 type = TREE_TYPE (ovf);
4482 t = make_tree (type, virtual_incoming_args_rtx);
4483 if (words != 0)
4484 t = build2 (PLUS_EXPR, type, t,
4485 build_int_cst (type, words * UNITS_PER_WORD));
4486 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4487 TREE_SIDE_EFFECTS (t) = 1;
4488 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4489
4490 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4491 {
4492 /* Find the register save area.
4493 Prologue of the function save it right above stack frame. */
4494 type = TREE_TYPE (sav);
4495 t = make_tree (type, frame_pointer_rtx);
4496 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4497 TREE_SIDE_EFFECTS (t) = 1;
4498 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4499 }
4500 }
4501
4502 /* Implement va_arg. */
4503
4504 static tree
4505 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4506 {
4507 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4508 tree f_gpr, f_fpr, f_ovf, f_sav;
4509 tree gpr, fpr, ovf, sav, t;
4510 int size, rsize;
4511 tree lab_false, lab_over = NULL_TREE;
4512 tree addr, t2;
4513 rtx container;
4514 int indirect_p = 0;
4515 tree ptrtype;
4516 enum machine_mode nat_mode;
4517
4518 /* Only 64bit target needs something special. */
4519 if (!TARGET_64BIT)
4520 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4521
4522 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4523 f_fpr = TREE_CHAIN (f_gpr);
4524 f_ovf = TREE_CHAIN (f_fpr);
4525 f_sav = TREE_CHAIN (f_ovf);
4526
4527 valist = build_va_arg_indirect_ref (valist);
4528 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4529 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4530 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4531 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4532
4533 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4534 if (indirect_p)
4535 type = build_pointer_type (type);
4536 size = int_size_in_bytes (type);
4537 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4538
4539 nat_mode = type_natural_mode (type);
4540 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4541 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4542
4543 /* Pull the value out of the saved registers. */
4544
4545 addr = create_tmp_var (ptr_type_node, "addr");
4546 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4547
4548 if (container)
4549 {
4550 int needed_intregs, needed_sseregs;
4551 bool need_temp;
4552 tree int_addr, sse_addr;
4553
4554 lab_false = create_artificial_label ();
4555 lab_over = create_artificial_label ();
4556
4557 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4558
4559 need_temp = (!REG_P (container)
4560 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4561 || TYPE_ALIGN (type) > 128));
4562
4563 /* In case we are passing structure, verify that it is consecutive block
4564 on the register save area. If not we need to do moves. */
4565 if (!need_temp && !REG_P (container))
4566 {
4567 /* Verify that all registers are strictly consecutive */
4568 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4569 {
4570 int i;
4571
4572 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4573 {
4574 rtx slot = XVECEXP (container, 0, i);
4575 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4576 || INTVAL (XEXP (slot, 1)) != i * 16)
4577 need_temp = 1;
4578 }
4579 }
4580 else
4581 {
4582 int i;
4583
4584 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4585 {
4586 rtx slot = XVECEXP (container, 0, i);
4587 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4588 || INTVAL (XEXP (slot, 1)) != i * 8)
4589 need_temp = 1;
4590 }
4591 }
4592 }
4593 if (!need_temp)
4594 {
4595 int_addr = addr;
4596 sse_addr = addr;
4597 }
4598 else
4599 {
4600 int_addr = create_tmp_var (ptr_type_node, "int_addr");
4601 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
4602 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
4603 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
4604 }
4605
4606 /* First ensure that we fit completely in registers. */
4607 if (needed_intregs)
4608 {
4609 t = build_int_cst (TREE_TYPE (gpr),
4610 (REGPARM_MAX - needed_intregs + 1) * 8);
4611 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
4612 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4613 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4614 gimplify_and_add (t, pre_p);
4615 }
4616 if (needed_sseregs)
4617 {
4618 t = build_int_cst (TREE_TYPE (fpr),
4619 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
4620 + REGPARM_MAX * 8);
4621 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
4622 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
4623 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
4624 gimplify_and_add (t, pre_p);
4625 }
4626
4627 /* Compute index to start of area used for integer regs. */
4628 if (needed_intregs)
4629 {
4630 /* int_addr = gpr + sav; */
4631 t = fold_convert (ptr_type_node, gpr);
4632 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4633 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
4634 gimplify_and_add (t, pre_p);
4635 }
4636 if (needed_sseregs)
4637 {
4638 /* sse_addr = fpr + sav; */
4639 t = fold_convert (ptr_type_node, fpr);
4640 t = build2 (PLUS_EXPR, ptr_type_node, sav, t);
4641 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
4642 gimplify_and_add (t, pre_p);
4643 }
4644 if (need_temp)
4645 {
4646 int i;
4647 tree temp = create_tmp_var (type, "va_arg_tmp");
4648
4649 /* addr = &temp; */
4650 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
4651 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4652 gimplify_and_add (t, pre_p);
4653
4654 for (i = 0; i < XVECLEN (container, 0); i++)
4655 {
4656 rtx slot = XVECEXP (container, 0, i);
4657 rtx reg = XEXP (slot, 0);
4658 enum machine_mode mode = GET_MODE (reg);
4659 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
4660 tree addr_type = build_pointer_type (piece_type);
4661 tree src_addr, src;
4662 int src_offset;
4663 tree dest_addr, dest;
4664
4665 if (SSE_REGNO_P (REGNO (reg)))
4666 {
4667 src_addr = sse_addr;
4668 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
4669 }
4670 else
4671 {
4672 src_addr = int_addr;
4673 src_offset = REGNO (reg) * 8;
4674 }
4675 src_addr = fold_convert (addr_type, src_addr);
4676 src_addr = fold_build2 (PLUS_EXPR, addr_type, src_addr,
4677 size_int (src_offset));
4678 src = build_va_arg_indirect_ref (src_addr);
4679
4680 dest_addr = fold_convert (addr_type, addr);
4681 dest_addr = fold_build2 (PLUS_EXPR, addr_type, dest_addr,
4682 size_int (INTVAL (XEXP (slot, 1))));
4683 dest = build_va_arg_indirect_ref (dest_addr);
4684
4685 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
4686 gimplify_and_add (t, pre_p);
4687 }
4688 }
4689
4690 if (needed_intregs)
4691 {
4692 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
4693 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
4694 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
4695 gimplify_and_add (t, pre_p);
4696 }
4697 if (needed_sseregs)
4698 {
4699 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
4700 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
4701 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
4702 gimplify_and_add (t, pre_p);
4703 }
4704
4705 t = build1 (GOTO_EXPR, void_type_node, lab_over);
4706 gimplify_and_add (t, pre_p);
4707
4708 t = build1 (LABEL_EXPR, void_type_node, lab_false);
4709 append_to_statement_list (t, pre_p);
4710 }
4711
4712 /* ... otherwise out of the overflow area. */
4713
4714 /* Care for on-stack alignment if needed. */
4715 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
4716 || integer_zerop (TYPE_SIZE (type)))
4717 t = ovf;
4718 else
4719 {
4720 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
4721 t = build2 (PLUS_EXPR, TREE_TYPE (ovf), ovf,
4722 build_int_cst (TREE_TYPE (ovf), align - 1));
4723 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
4724 build_int_cst (TREE_TYPE (t), -align));
4725 }
4726 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
4727
4728 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
4729 gimplify_and_add (t2, pre_p);
4730
4731 t = build2 (PLUS_EXPR, TREE_TYPE (t), t,
4732 build_int_cst (TREE_TYPE (t), rsize * UNITS_PER_WORD));
4733 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
4734 gimplify_and_add (t, pre_p);
4735
4736 if (container)
4737 {
4738 t = build1 (LABEL_EXPR, void_type_node, lab_over);
4739 append_to_statement_list (t, pre_p);
4740 }
4741
4742 ptrtype = build_pointer_type (type);
4743 addr = fold_convert (ptrtype, addr);
4744
4745 if (indirect_p)
4746 addr = build_va_arg_indirect_ref (addr);
4747 return build_va_arg_indirect_ref (addr);
4748 }
4749 \f
4750 /* Return nonzero if OPNUM's MEM should be matched
4751 in movabs* patterns. */
4752
4753 int
4754 ix86_check_movabs (rtx insn, int opnum)
4755 {
4756 rtx set, mem;
4757
4758 set = PATTERN (insn);
4759 if (GET_CODE (set) == PARALLEL)
4760 set = XVECEXP (set, 0, 0);
4761 gcc_assert (GET_CODE (set) == SET);
4762 mem = XEXP (set, opnum);
4763 while (GET_CODE (mem) == SUBREG)
4764 mem = SUBREG_REG (mem);
4765 gcc_assert (MEM_P (mem));
4766 return (volatile_ok || !MEM_VOLATILE_P (mem));
4767 }
4768 \f
4769 /* Initialize the table of extra 80387 mathematical constants. */
4770
4771 static void
4772 init_ext_80387_constants (void)
4773 {
4774 static const char * cst[5] =
4775 {
4776 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
4777 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
4778 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
4779 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
4780 "3.1415926535897932385128089594061862044", /* 4: fldpi */
4781 };
4782 int i;
4783
4784 for (i = 0; i < 5; i++)
4785 {
4786 real_from_string (&ext_80387_constants_table[i], cst[i]);
4787 /* Ensure each constant is rounded to XFmode precision. */
4788 real_convert (&ext_80387_constants_table[i],
4789 XFmode, &ext_80387_constants_table[i]);
4790 }
4791
4792 ext_80387_constants_init = 1;
4793 }
4794
4795 /* Return true if the constant is something that can be loaded with
4796 a special instruction. */
4797
4798 int
4799 standard_80387_constant_p (rtx x)
4800 {
4801 REAL_VALUE_TYPE r;
4802
4803 if (GET_CODE (x) != CONST_DOUBLE || !FLOAT_MODE_P (GET_MODE (x)))
4804 return -1;
4805
4806 if (x == CONST0_RTX (GET_MODE (x)))
4807 return 1;
4808 if (x == CONST1_RTX (GET_MODE (x)))
4809 return 2;
4810
4811 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
4812
4813 /* For XFmode constants, try to find a special 80387 instruction when
4814 optimizing for size or on those CPUs that benefit from them. */
4815 if (GET_MODE (x) == XFmode
4816 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
4817 {
4818 int i;
4819
4820 if (! ext_80387_constants_init)
4821 init_ext_80387_constants ();
4822
4823 for (i = 0; i < 5; i++)
4824 if (real_identical (&r, &ext_80387_constants_table[i]))
4825 return i + 3;
4826 }
4827
4828 /* Load of the constant -0.0 or -1.0 will be split as
4829 fldz;fchs or fld1;fchs sequence. */
4830 if (real_isnegzero (&r))
4831 return 8;
4832 if (real_identical (&r, &dconstm1))
4833 return 9;
4834
4835 return 0;
4836 }
4837
4838 /* Return the opcode of the special instruction to be used to load
4839 the constant X. */
4840
4841 const char *
4842 standard_80387_constant_opcode (rtx x)
4843 {
4844 switch (standard_80387_constant_p (x))
4845 {
4846 case 1:
4847 return "fldz";
4848 case 2:
4849 return "fld1";
4850 case 3:
4851 return "fldlg2";
4852 case 4:
4853 return "fldln2";
4854 case 5:
4855 return "fldl2e";
4856 case 6:
4857 return "fldl2t";
4858 case 7:
4859 return "fldpi";
4860 case 8:
4861 case 9:
4862 return "#";
4863 default:
4864 gcc_unreachable ();
4865 }
4866 }
4867
4868 /* Return the CONST_DOUBLE representing the 80387 constant that is
4869 loaded by the specified special instruction. The argument IDX
4870 matches the return value from standard_80387_constant_p. */
4871
4872 rtx
4873 standard_80387_constant_rtx (int idx)
4874 {
4875 int i;
4876
4877 if (! ext_80387_constants_init)
4878 init_ext_80387_constants ();
4879
4880 switch (idx)
4881 {
4882 case 3:
4883 case 4:
4884 case 5:
4885 case 6:
4886 case 7:
4887 i = idx - 3;
4888 break;
4889
4890 default:
4891 gcc_unreachable ();
4892 }
4893
4894 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
4895 XFmode);
4896 }
4897
4898 /* Return 1 if mode is a valid mode for sse. */
4899 static int
4900 standard_sse_mode_p (enum machine_mode mode)
4901 {
4902 switch (mode)
4903 {
4904 case V16QImode:
4905 case V8HImode:
4906 case V4SImode:
4907 case V2DImode:
4908 case V4SFmode:
4909 case V2DFmode:
4910 return 1;
4911
4912 default:
4913 return 0;
4914 }
4915 }
4916
4917 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
4918 */
4919 int
4920 standard_sse_constant_p (rtx x)
4921 {
4922 enum machine_mode mode = GET_MODE (x);
4923
4924 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
4925 return 1;
4926 if (vector_all_ones_operand (x, mode)
4927 && standard_sse_mode_p (mode))
4928 return TARGET_SSE2 ? 2 : -1;
4929
4930 return 0;
4931 }
4932
4933 /* Return the opcode of the special instruction to be used to load
4934 the constant X. */
4935
4936 const char *
4937 standard_sse_constant_opcode (rtx insn, rtx x)
4938 {
4939 switch (standard_sse_constant_p (x))
4940 {
4941 case 1:
4942 if (get_attr_mode (insn) == MODE_V4SF)
4943 return "xorps\t%0, %0";
4944 else if (get_attr_mode (insn) == MODE_V2DF)
4945 return "xorpd\t%0, %0";
4946 else
4947 return "pxor\t%0, %0";
4948 case 2:
4949 return "pcmpeqd\t%0, %0";
4950 }
4951 gcc_unreachable ();
4952 }
4953
4954 /* Returns 1 if OP contains a symbol reference */
4955
4956 int
4957 symbolic_reference_mentioned_p (rtx op)
4958 {
4959 const char *fmt;
4960 int i;
4961
4962 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
4963 return 1;
4964
4965 fmt = GET_RTX_FORMAT (GET_CODE (op));
4966 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
4967 {
4968 if (fmt[i] == 'E')
4969 {
4970 int j;
4971
4972 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
4973 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
4974 return 1;
4975 }
4976
4977 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
4978 return 1;
4979 }
4980
4981 return 0;
4982 }
4983
4984 /* Return 1 if it is appropriate to emit `ret' instructions in the
4985 body of a function. Do this only if the epilogue is simple, needing a
4986 couple of insns. Prior to reloading, we can't tell how many registers
4987 must be saved, so return 0 then. Return 0 if there is no frame
4988 marker to de-allocate. */
4989
4990 int
4991 ix86_can_use_return_insn_p (void)
4992 {
4993 struct ix86_frame frame;
4994
4995 if (! reload_completed || frame_pointer_needed)
4996 return 0;
4997
4998 /* Don't allow more than 32 pop, since that's all we can do
4999 with one instruction. */
5000 if (current_function_pops_args
5001 && current_function_args_size >= 32768)
5002 return 0;
5003
5004 ix86_compute_frame_layout (&frame);
5005 return frame.to_allocate == 0 && frame.nregs == 0;
5006 }
5007 \f
5008 /* Value should be nonzero if functions must have frame pointers.
5009 Zero means the frame pointer need not be set up (and parms may
5010 be accessed via the stack pointer) in functions that seem suitable. */
5011
5012 int
5013 ix86_frame_pointer_required (void)
5014 {
5015 /* If we accessed previous frames, then the generated code expects
5016 to be able to access the saved ebp value in our frame. */
5017 if (cfun->machine->accesses_prev_frame)
5018 return 1;
5019
5020 /* Several x86 os'es need a frame pointer for other reasons,
5021 usually pertaining to setjmp. */
5022 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5023 return 1;
5024
5025 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5026 the frame pointer by default. Turn it back on now if we've not
5027 got a leaf function. */
5028 if (TARGET_OMIT_LEAF_FRAME_POINTER
5029 && (!current_function_is_leaf
5030 || ix86_current_function_calls_tls_descriptor))
5031 return 1;
5032
5033 if (current_function_profile)
5034 return 1;
5035
5036 return 0;
5037 }
5038
5039 /* Record that the current function accesses previous call frames. */
5040
5041 void
5042 ix86_setup_frame_addresses (void)
5043 {
5044 cfun->machine->accesses_prev_frame = 1;
5045 }
5046 \f
5047 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5048 # define USE_HIDDEN_LINKONCE 1
5049 #else
5050 # define USE_HIDDEN_LINKONCE 0
5051 #endif
5052
5053 static int pic_labels_used;
5054
5055 /* Fills in the label name that should be used for a pc thunk for
5056 the given register. */
5057
5058 static void
5059 get_pc_thunk_name (char name[32], unsigned int regno)
5060 {
5061 gcc_assert (!TARGET_64BIT);
5062
5063 if (USE_HIDDEN_LINKONCE)
5064 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5065 else
5066 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5067 }
5068
5069
5070 /* This function generates code for -fpic that loads %ebx with
5071 the return address of the caller and then returns. */
5072
5073 void
5074 ix86_file_end (void)
5075 {
5076 rtx xops[2];
5077 int regno;
5078
5079 for (regno = 0; regno < 8; ++regno)
5080 {
5081 char name[32];
5082
5083 if (! ((pic_labels_used >> regno) & 1))
5084 continue;
5085
5086 get_pc_thunk_name (name, regno);
5087
5088 #if TARGET_MACHO
5089 if (TARGET_MACHO)
5090 {
5091 switch_to_section (darwin_sections[text_coal_section]);
5092 fputs ("\t.weak_definition\t", asm_out_file);
5093 assemble_name (asm_out_file, name);
5094 fputs ("\n\t.private_extern\t", asm_out_file);
5095 assemble_name (asm_out_file, name);
5096 fputs ("\n", asm_out_file);
5097 ASM_OUTPUT_LABEL (asm_out_file, name);
5098 }
5099 else
5100 #endif
5101 if (USE_HIDDEN_LINKONCE)
5102 {
5103 tree decl;
5104
5105 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5106 error_mark_node);
5107 TREE_PUBLIC (decl) = 1;
5108 TREE_STATIC (decl) = 1;
5109 DECL_ONE_ONLY (decl) = 1;
5110
5111 (*targetm.asm_out.unique_section) (decl, 0);
5112 switch_to_section (get_named_section (decl, NULL, 0));
5113
5114 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5115 fputs ("\t.hidden\t", asm_out_file);
5116 assemble_name (asm_out_file, name);
5117 fputc ('\n', asm_out_file);
5118 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5119 }
5120 else
5121 {
5122 switch_to_section (text_section);
5123 ASM_OUTPUT_LABEL (asm_out_file, name);
5124 }
5125
5126 xops[0] = gen_rtx_REG (SImode, regno);
5127 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5128 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5129 output_asm_insn ("ret", xops);
5130 }
5131
5132 if (NEED_INDICATE_EXEC_STACK)
5133 file_end_indicate_exec_stack ();
5134 }
5135
5136 /* Emit code for the SET_GOT patterns. */
5137
5138 const char *
5139 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5140 {
5141 rtx xops[3];
5142
5143 xops[0] = dest;
5144
5145 if (TARGET_VXWORKS_RTP && flag_pic)
5146 {
5147 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5148 xops[2] = gen_rtx_MEM (Pmode,
5149 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5150 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5151
5152 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5153 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5154 an unadorned address. */
5155 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5156 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5157 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5158 return "";
5159 }
5160
5161 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5162
5163 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5164 {
5165 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5166
5167 if (!flag_pic)
5168 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5169 else
5170 output_asm_insn ("call\t%a2", xops);
5171
5172 #if TARGET_MACHO
5173 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5174 is what will be referenced by the Mach-O PIC subsystem. */
5175 if (!label)
5176 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5177 #endif
5178
5179 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5180 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5181
5182 if (flag_pic)
5183 output_asm_insn ("pop{l}\t%0", xops);
5184 }
5185 else
5186 {
5187 char name[32];
5188 get_pc_thunk_name (name, REGNO (dest));
5189 pic_labels_used |= 1 << REGNO (dest);
5190
5191 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5192 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5193 output_asm_insn ("call\t%X2", xops);
5194 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5195 is what will be referenced by the Mach-O PIC subsystem. */
5196 #if TARGET_MACHO
5197 if (!label)
5198 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5199 else
5200 targetm.asm_out.internal_label (asm_out_file, "L",
5201 CODE_LABEL_NUMBER (label));
5202 #endif
5203 }
5204
5205 if (TARGET_MACHO)
5206 return "";
5207
5208 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5209 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5210 else
5211 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5212
5213 return "";
5214 }
5215
5216 /* Generate an "push" pattern for input ARG. */
5217
5218 static rtx
5219 gen_push (rtx arg)
5220 {
5221 return gen_rtx_SET (VOIDmode,
5222 gen_rtx_MEM (Pmode,
5223 gen_rtx_PRE_DEC (Pmode,
5224 stack_pointer_rtx)),
5225 arg);
5226 }
5227
5228 /* Return >= 0 if there is an unused call-clobbered register available
5229 for the entire function. */
5230
5231 static unsigned int
5232 ix86_select_alt_pic_regnum (void)
5233 {
5234 if (current_function_is_leaf && !current_function_profile
5235 && !ix86_current_function_calls_tls_descriptor)
5236 {
5237 int i;
5238 for (i = 2; i >= 0; --i)
5239 if (!regs_ever_live[i])
5240 return i;
5241 }
5242
5243 return INVALID_REGNUM;
5244 }
5245
5246 /* Return 1 if we need to save REGNO. */
5247 static int
5248 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5249 {
5250 if (pic_offset_table_rtx
5251 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5252 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5253 || current_function_profile
5254 || current_function_calls_eh_return
5255 || current_function_uses_const_pool))
5256 {
5257 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5258 return 0;
5259 return 1;
5260 }
5261
5262 if (current_function_calls_eh_return && maybe_eh_return)
5263 {
5264 unsigned i;
5265 for (i = 0; ; i++)
5266 {
5267 unsigned test = EH_RETURN_DATA_REGNO (i);
5268 if (test == INVALID_REGNUM)
5269 break;
5270 if (test == regno)
5271 return 1;
5272 }
5273 }
5274
5275 if (cfun->machine->force_align_arg_pointer
5276 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5277 return 1;
5278
5279 return (regs_ever_live[regno]
5280 && !call_used_regs[regno]
5281 && !fixed_regs[regno]
5282 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5283 }
5284
5285 /* Return number of registers to be saved on the stack. */
5286
5287 static int
5288 ix86_nsaved_regs (void)
5289 {
5290 int nregs = 0;
5291 int regno;
5292
5293 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5294 if (ix86_save_reg (regno, true))
5295 nregs++;
5296 return nregs;
5297 }
5298
5299 /* Return the offset between two registers, one to be eliminated, and the other
5300 its replacement, at the start of a routine. */
5301
5302 HOST_WIDE_INT
5303 ix86_initial_elimination_offset (int from, int to)
5304 {
5305 struct ix86_frame frame;
5306 ix86_compute_frame_layout (&frame);
5307
5308 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5309 return frame.hard_frame_pointer_offset;
5310 else if (from == FRAME_POINTER_REGNUM
5311 && to == HARD_FRAME_POINTER_REGNUM)
5312 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5313 else
5314 {
5315 gcc_assert (to == STACK_POINTER_REGNUM);
5316
5317 if (from == ARG_POINTER_REGNUM)
5318 return frame.stack_pointer_offset;
5319
5320 gcc_assert (from == FRAME_POINTER_REGNUM);
5321 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5322 }
5323 }
5324
5325 /* Fill structure ix86_frame about frame of currently computed function. */
5326
5327 static void
5328 ix86_compute_frame_layout (struct ix86_frame *frame)
5329 {
5330 HOST_WIDE_INT total_size;
5331 unsigned int stack_alignment_needed;
5332 HOST_WIDE_INT offset;
5333 unsigned int preferred_alignment;
5334 HOST_WIDE_INT size = get_frame_size ();
5335
5336 frame->nregs = ix86_nsaved_regs ();
5337 total_size = size;
5338
5339 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5340 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5341
5342 /* During reload iteration the amount of registers saved can change.
5343 Recompute the value as needed. Do not recompute when amount of registers
5344 didn't change as reload does multiple calls to the function and does not
5345 expect the decision to change within single iteration. */
5346 if (!optimize_size
5347 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5348 {
5349 int count = frame->nregs;
5350
5351 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5352 /* The fast prologue uses move instead of push to save registers. This
5353 is significantly longer, but also executes faster as modern hardware
5354 can execute the moves in parallel, but can't do that for push/pop.
5355
5356 Be careful about choosing what prologue to emit: When function takes
5357 many instructions to execute we may use slow version as well as in
5358 case function is known to be outside hot spot (this is known with
5359 feedback only). Weight the size of function by number of registers
5360 to save as it is cheap to use one or two push instructions but very
5361 slow to use many of them. */
5362 if (count)
5363 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5364 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5365 || (flag_branch_probabilities
5366 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5367 cfun->machine->use_fast_prologue_epilogue = false;
5368 else
5369 cfun->machine->use_fast_prologue_epilogue
5370 = !expensive_function_p (count);
5371 }
5372 if (TARGET_PROLOGUE_USING_MOVE
5373 && cfun->machine->use_fast_prologue_epilogue)
5374 frame->save_regs_using_mov = true;
5375 else
5376 frame->save_regs_using_mov = false;
5377
5378
5379 /* Skip return address and saved base pointer. */
5380 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5381
5382 frame->hard_frame_pointer_offset = offset;
5383
5384 /* Do some sanity checking of stack_alignment_needed and
5385 preferred_alignment, since i386 port is the only using those features
5386 that may break easily. */
5387
5388 gcc_assert (!size || stack_alignment_needed);
5389 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5390 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5391 gcc_assert (stack_alignment_needed
5392 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5393
5394 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5395 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5396
5397 /* Register save area */
5398 offset += frame->nregs * UNITS_PER_WORD;
5399
5400 /* Va-arg area */
5401 if (ix86_save_varrargs_registers)
5402 {
5403 offset += X86_64_VARARGS_SIZE;
5404 frame->va_arg_size = X86_64_VARARGS_SIZE;
5405 }
5406 else
5407 frame->va_arg_size = 0;
5408
5409 /* Align start of frame for local function. */
5410 frame->padding1 = ((offset + stack_alignment_needed - 1)
5411 & -stack_alignment_needed) - offset;
5412
5413 offset += frame->padding1;
5414
5415 /* Frame pointer points here. */
5416 frame->frame_pointer_offset = offset;
5417
5418 offset += size;
5419
5420 /* Add outgoing arguments area. Can be skipped if we eliminated
5421 all the function calls as dead code.
5422 Skipping is however impossible when function calls alloca. Alloca
5423 expander assumes that last current_function_outgoing_args_size
5424 of stack frame are unused. */
5425 if (ACCUMULATE_OUTGOING_ARGS
5426 && (!current_function_is_leaf || current_function_calls_alloca
5427 || ix86_current_function_calls_tls_descriptor))
5428 {
5429 offset += current_function_outgoing_args_size;
5430 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5431 }
5432 else
5433 frame->outgoing_arguments_size = 0;
5434
5435 /* Align stack boundary. Only needed if we're calling another function
5436 or using alloca. */
5437 if (!current_function_is_leaf || current_function_calls_alloca
5438 || ix86_current_function_calls_tls_descriptor)
5439 frame->padding2 = ((offset + preferred_alignment - 1)
5440 & -preferred_alignment) - offset;
5441 else
5442 frame->padding2 = 0;
5443
5444 offset += frame->padding2;
5445
5446 /* We've reached end of stack frame. */
5447 frame->stack_pointer_offset = offset;
5448
5449 /* Size prologue needs to allocate. */
5450 frame->to_allocate =
5451 (size + frame->padding1 + frame->padding2
5452 + frame->outgoing_arguments_size + frame->va_arg_size);
5453
5454 if ((!frame->to_allocate && frame->nregs <= 1)
5455 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5456 frame->save_regs_using_mov = false;
5457
5458 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5459 && current_function_is_leaf
5460 && !ix86_current_function_calls_tls_descriptor)
5461 {
5462 frame->red_zone_size = frame->to_allocate;
5463 if (frame->save_regs_using_mov)
5464 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5465 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5466 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5467 }
5468 else
5469 frame->red_zone_size = 0;
5470 frame->to_allocate -= frame->red_zone_size;
5471 frame->stack_pointer_offset -= frame->red_zone_size;
5472 #if 0
5473 fprintf (stderr, "\n");
5474 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5475 fprintf (stderr, "size: %ld\n", (long)size);
5476 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5477 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5478 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5479 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5480 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5481 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5482 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5483 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5484 (long)frame->hard_frame_pointer_offset);
5485 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5486 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5487 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5488 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5489 #endif
5490 }
5491
5492 /* Emit code to save registers in the prologue. */
5493
5494 static void
5495 ix86_emit_save_regs (void)
5496 {
5497 unsigned int regno;
5498 rtx insn;
5499
5500 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5501 if (ix86_save_reg (regno, true))
5502 {
5503 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5504 RTX_FRAME_RELATED_P (insn) = 1;
5505 }
5506 }
5507
5508 /* Emit code to save registers using MOV insns. First register
5509 is restored from POINTER + OFFSET. */
5510 static void
5511 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5512 {
5513 unsigned int regno;
5514 rtx insn;
5515
5516 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5517 if (ix86_save_reg (regno, true))
5518 {
5519 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5520 Pmode, offset),
5521 gen_rtx_REG (Pmode, regno));
5522 RTX_FRAME_RELATED_P (insn) = 1;
5523 offset += UNITS_PER_WORD;
5524 }
5525 }
5526
5527 /* Expand prologue or epilogue stack adjustment.
5528 The pattern exist to put a dependency on all ebp-based memory accesses.
5529 STYLE should be negative if instructions should be marked as frame related,
5530 zero if %r11 register is live and cannot be freely used and positive
5531 otherwise. */
5532
5533 static void
5534 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5535 {
5536 rtx insn;
5537
5538 if (! TARGET_64BIT)
5539 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5540 else if (x86_64_immediate_operand (offset, DImode))
5541 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5542 else
5543 {
5544 rtx r11;
5545 /* r11 is used by indirect sibcall return as well, set before the
5546 epilogue and used after the epilogue. ATM indirect sibcall
5547 shouldn't be used together with huge frame sizes in one
5548 function because of the frame_size check in sibcall.c. */
5549 gcc_assert (style);
5550 r11 = gen_rtx_REG (DImode, R11_REG);
5551 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5552 if (style < 0)
5553 RTX_FRAME_RELATED_P (insn) = 1;
5554 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5555 offset));
5556 }
5557 if (style < 0)
5558 RTX_FRAME_RELATED_P (insn) = 1;
5559 }
5560
5561 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5562
5563 static rtx
5564 ix86_internal_arg_pointer (void)
5565 {
5566 bool has_force_align_arg_pointer =
5567 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5568 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5569 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5570 && DECL_NAME (current_function_decl)
5571 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5572 && DECL_FILE_SCOPE_P (current_function_decl))
5573 || ix86_force_align_arg_pointer
5574 || has_force_align_arg_pointer)
5575 {
5576 /* Nested functions can't realign the stack due to a register
5577 conflict. */
5578 if (DECL_CONTEXT (current_function_decl)
5579 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5580 {
5581 if (ix86_force_align_arg_pointer)
5582 warning (0, "-mstackrealign ignored for nested functions");
5583 if (has_force_align_arg_pointer)
5584 error ("%s not supported for nested functions",
5585 ix86_force_align_arg_pointer_string);
5586 return virtual_incoming_args_rtx;
5587 }
5588 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
5589 return copy_to_reg (cfun->machine->force_align_arg_pointer);
5590 }
5591 else
5592 return virtual_incoming_args_rtx;
5593 }
5594
5595 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
5596 This is called from dwarf2out.c to emit call frame instructions
5597 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
5598 static void
5599 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
5600 {
5601 rtx unspec = SET_SRC (pattern);
5602 gcc_assert (GET_CODE (unspec) == UNSPEC);
5603
5604 switch (index)
5605 {
5606 case UNSPEC_REG_SAVE:
5607 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
5608 SET_DEST (pattern));
5609 break;
5610 case UNSPEC_DEF_CFA:
5611 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
5612 INTVAL (XVECEXP (unspec, 0, 0)));
5613 break;
5614 default:
5615 gcc_unreachable ();
5616 }
5617 }
5618
5619 /* Expand the prologue into a bunch of separate insns. */
5620
5621 void
5622 ix86_expand_prologue (void)
5623 {
5624 rtx insn;
5625 bool pic_reg_used;
5626 struct ix86_frame frame;
5627 HOST_WIDE_INT allocate;
5628
5629 ix86_compute_frame_layout (&frame);
5630
5631 if (cfun->machine->force_align_arg_pointer)
5632 {
5633 rtx x, y;
5634
5635 /* Grab the argument pointer. */
5636 x = plus_constant (stack_pointer_rtx, 4);
5637 y = cfun->machine->force_align_arg_pointer;
5638 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
5639 RTX_FRAME_RELATED_P (insn) = 1;
5640
5641 /* The unwind info consists of two parts: install the fafp as the cfa,
5642 and record the fafp as the "save register" of the stack pointer.
5643 The later is there in order that the unwinder can see where it
5644 should restore the stack pointer across the and insn. */
5645 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
5646 x = gen_rtx_SET (VOIDmode, y, x);
5647 RTX_FRAME_RELATED_P (x) = 1;
5648 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
5649 UNSPEC_REG_SAVE);
5650 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
5651 RTX_FRAME_RELATED_P (y) = 1;
5652 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
5653 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5654 REG_NOTES (insn) = x;
5655
5656 /* Align the stack. */
5657 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
5658 GEN_INT (-16)));
5659
5660 /* And here we cheat like madmen with the unwind info. We force the
5661 cfa register back to sp+4, which is exactly what it was at the
5662 start of the function. Re-pushing the return address results in
5663 the return at the same spot relative to the cfa, and thus is
5664 correct wrt the unwind info. */
5665 x = cfun->machine->force_align_arg_pointer;
5666 x = gen_frame_mem (Pmode, plus_constant (x, -4));
5667 insn = emit_insn (gen_push (x));
5668 RTX_FRAME_RELATED_P (insn) = 1;
5669
5670 x = GEN_INT (4);
5671 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
5672 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
5673 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
5674 REG_NOTES (insn) = x;
5675 }
5676
5677 /* Note: AT&T enter does NOT have reversed args. Enter is probably
5678 slower on all targets. Also sdb doesn't like it. */
5679
5680 if (frame_pointer_needed)
5681 {
5682 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
5683 RTX_FRAME_RELATED_P (insn) = 1;
5684
5685 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
5686 RTX_FRAME_RELATED_P (insn) = 1;
5687 }
5688
5689 allocate = frame.to_allocate;
5690
5691 if (!frame.save_regs_using_mov)
5692 ix86_emit_save_regs ();
5693 else
5694 allocate += frame.nregs * UNITS_PER_WORD;
5695
5696 /* When using red zone we may start register saving before allocating
5697 the stack frame saving one cycle of the prologue. */
5698 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
5699 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
5700 : stack_pointer_rtx,
5701 -frame.nregs * UNITS_PER_WORD);
5702
5703 if (allocate == 0)
5704 ;
5705 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
5706 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5707 GEN_INT (-allocate), -1);
5708 else
5709 {
5710 /* Only valid for Win32. */
5711 rtx eax = gen_rtx_REG (SImode, 0);
5712 bool eax_live = ix86_eax_live_at_start_p ();
5713 rtx t;
5714
5715 gcc_assert (!TARGET_64BIT);
5716
5717 if (eax_live)
5718 {
5719 emit_insn (gen_push (eax));
5720 allocate -= 4;
5721 }
5722
5723 emit_move_insn (eax, GEN_INT (allocate));
5724
5725 insn = emit_insn (gen_allocate_stack_worker (eax));
5726 RTX_FRAME_RELATED_P (insn) = 1;
5727 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
5728 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
5729 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
5730 t, REG_NOTES (insn));
5731
5732 if (eax_live)
5733 {
5734 if (frame_pointer_needed)
5735 t = plus_constant (hard_frame_pointer_rtx,
5736 allocate
5737 - frame.to_allocate
5738 - frame.nregs * UNITS_PER_WORD);
5739 else
5740 t = plus_constant (stack_pointer_rtx, allocate);
5741 emit_move_insn (eax, gen_rtx_MEM (SImode, t));
5742 }
5743 }
5744
5745 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
5746 {
5747 if (!frame_pointer_needed || !frame.to_allocate)
5748 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
5749 else
5750 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
5751 -frame.nregs * UNITS_PER_WORD);
5752 }
5753
5754 pic_reg_used = false;
5755 if (pic_offset_table_rtx
5756 && (regs_ever_live[REAL_PIC_OFFSET_TABLE_REGNUM]
5757 || current_function_profile))
5758 {
5759 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
5760
5761 if (alt_pic_reg_used != INVALID_REGNUM)
5762 REGNO (pic_offset_table_rtx) = alt_pic_reg_used;
5763
5764 pic_reg_used = true;
5765 }
5766
5767 if (pic_reg_used)
5768 {
5769 if (TARGET_64BIT)
5770 {
5771 if (ix86_cmodel == CM_LARGE_PIC)
5772 {
5773 rtx tmp_reg = gen_rtx_REG (DImode,
5774 FIRST_REX_INT_REG + 3 /* R11 */);
5775 rtx label = gen_label_rtx ();
5776 emit_label (label);
5777 LABEL_PRESERVE_P (label) = 1;
5778 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
5779 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
5780 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5781 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
5782 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5783 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
5784 pic_offset_table_rtx, tmp_reg));
5785 }
5786 else
5787 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
5788 }
5789 else
5790 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
5791
5792 /* Even with accurate pre-reload life analysis, we can wind up
5793 deleting all references to the pic register after reload.
5794 Consider if cross-jumping unifies two sides of a branch
5795 controlled by a comparison vs the only read from a global.
5796 In which case, allow the set_got to be deleted, though we're
5797 too late to do anything about the ebx save in the prologue. */
5798 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_MAYBE_DEAD, const0_rtx, NULL);
5799 }
5800
5801 /* Prevent function calls from be scheduled before the call to mcount.
5802 In the pic_reg_used case, make sure that the got load isn't deleted. */
5803 if (current_function_profile)
5804 emit_insn (gen_blockage (pic_reg_used ? pic_offset_table_rtx : const0_rtx));
5805 }
5806
5807 /* Emit code to restore saved registers using MOV insns. First register
5808 is restored from POINTER + OFFSET. */
5809 static void
5810 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
5811 int maybe_eh_return)
5812 {
5813 int regno;
5814 rtx base_address = gen_rtx_MEM (Pmode, pointer);
5815
5816 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5817 if (ix86_save_reg (regno, maybe_eh_return))
5818 {
5819 /* Ensure that adjust_address won't be forced to produce pointer
5820 out of range allowed by x86-64 instruction set. */
5821 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
5822 {
5823 rtx r11;
5824
5825 r11 = gen_rtx_REG (DImode, R11_REG);
5826 emit_move_insn (r11, GEN_INT (offset));
5827 emit_insn (gen_adddi3 (r11, r11, pointer));
5828 base_address = gen_rtx_MEM (Pmode, r11);
5829 offset = 0;
5830 }
5831 emit_move_insn (gen_rtx_REG (Pmode, regno),
5832 adjust_address (base_address, Pmode, offset));
5833 offset += UNITS_PER_WORD;
5834 }
5835 }
5836
5837 /* Restore function stack, frame, and registers. */
5838
5839 void
5840 ix86_expand_epilogue (int style)
5841 {
5842 int regno;
5843 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
5844 struct ix86_frame frame;
5845 HOST_WIDE_INT offset;
5846
5847 ix86_compute_frame_layout (&frame);
5848
5849 /* Calculate start of saved registers relative to ebp. Special care
5850 must be taken for the normal return case of a function using
5851 eh_return: the eax and edx registers are marked as saved, but not
5852 restored along this path. */
5853 offset = frame.nregs;
5854 if (current_function_calls_eh_return && style != 2)
5855 offset -= 2;
5856 offset *= -UNITS_PER_WORD;
5857
5858 /* If we're only restoring one register and sp is not valid then
5859 using a move instruction to restore the register since it's
5860 less work than reloading sp and popping the register.
5861
5862 The default code result in stack adjustment using add/lea instruction,
5863 while this code results in LEAVE instruction (or discrete equivalent),
5864 so it is profitable in some other cases as well. Especially when there
5865 are no registers to restore. We also use this code when TARGET_USE_LEAVE
5866 and there is exactly one register to pop. This heuristic may need some
5867 tuning in future. */
5868 if ((!sp_valid && frame.nregs <= 1)
5869 || (TARGET_EPILOGUE_USING_MOVE
5870 && cfun->machine->use_fast_prologue_epilogue
5871 && (frame.nregs > 1 || frame.to_allocate))
5872 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
5873 || (frame_pointer_needed && TARGET_USE_LEAVE
5874 && cfun->machine->use_fast_prologue_epilogue
5875 && frame.nregs == 1)
5876 || current_function_calls_eh_return)
5877 {
5878 /* Restore registers. We can use ebp or esp to address the memory
5879 locations. If both are available, default to ebp, since offsets
5880 are known to be small. Only exception is esp pointing directly to the
5881 end of block of saved registers, where we may simplify addressing
5882 mode. */
5883
5884 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
5885 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
5886 frame.to_allocate, style == 2);
5887 else
5888 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
5889 offset, style == 2);
5890
5891 /* eh_return epilogues need %ecx added to the stack pointer. */
5892 if (style == 2)
5893 {
5894 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
5895
5896 if (frame_pointer_needed)
5897 {
5898 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
5899 tmp = plus_constant (tmp, UNITS_PER_WORD);
5900 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
5901
5902 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
5903 emit_move_insn (hard_frame_pointer_rtx, tmp);
5904
5905 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
5906 const0_rtx, style);
5907 }
5908 else
5909 {
5910 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
5911 tmp = plus_constant (tmp, (frame.to_allocate
5912 + frame.nregs * UNITS_PER_WORD));
5913 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
5914 }
5915 }
5916 else if (!frame_pointer_needed)
5917 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5918 GEN_INT (frame.to_allocate
5919 + frame.nregs * UNITS_PER_WORD),
5920 style);
5921 /* If not an i386, mov & pop is faster than "leave". */
5922 else if (TARGET_USE_LEAVE || optimize_size
5923 || !cfun->machine->use_fast_prologue_epilogue)
5924 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5925 else
5926 {
5927 pro_epilogue_adjust_stack (stack_pointer_rtx,
5928 hard_frame_pointer_rtx,
5929 const0_rtx, style);
5930 if (TARGET_64BIT)
5931 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5932 else
5933 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5934 }
5935 }
5936 else
5937 {
5938 /* First step is to deallocate the stack frame so that we can
5939 pop the registers. */
5940 if (!sp_valid)
5941 {
5942 gcc_assert (frame_pointer_needed);
5943 pro_epilogue_adjust_stack (stack_pointer_rtx,
5944 hard_frame_pointer_rtx,
5945 GEN_INT (offset), style);
5946 }
5947 else if (frame.to_allocate)
5948 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
5949 GEN_INT (frame.to_allocate), style);
5950
5951 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5952 if (ix86_save_reg (regno, false))
5953 {
5954 if (TARGET_64BIT)
5955 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
5956 else
5957 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
5958 }
5959 if (frame_pointer_needed)
5960 {
5961 /* Leave results in shorter dependency chains on CPUs that are
5962 able to grok it fast. */
5963 if (TARGET_USE_LEAVE)
5964 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
5965 else if (TARGET_64BIT)
5966 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
5967 else
5968 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
5969 }
5970 }
5971
5972 if (cfun->machine->force_align_arg_pointer)
5973 {
5974 emit_insn (gen_addsi3 (stack_pointer_rtx,
5975 cfun->machine->force_align_arg_pointer,
5976 GEN_INT (-4)));
5977 }
5978
5979 /* Sibcall epilogues don't want a return instruction. */
5980 if (style == 0)
5981 return;
5982
5983 if (current_function_pops_args && current_function_args_size)
5984 {
5985 rtx popc = GEN_INT (current_function_pops_args);
5986
5987 /* i386 can only pop 64K bytes. If asked to pop more, pop
5988 return address, do explicit add, and jump indirectly to the
5989 caller. */
5990
5991 if (current_function_pops_args >= 65536)
5992 {
5993 rtx ecx = gen_rtx_REG (SImode, 2);
5994
5995 /* There is no "pascal" calling convention in 64bit ABI. */
5996 gcc_assert (!TARGET_64BIT);
5997
5998 emit_insn (gen_popsi1 (ecx));
5999 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6000 emit_jump_insn (gen_return_indirect_internal (ecx));
6001 }
6002 else
6003 emit_jump_insn (gen_return_pop_internal (popc));
6004 }
6005 else
6006 emit_jump_insn (gen_return_internal ());
6007 }
6008
6009 /* Reset from the function's potential modifications. */
6010
6011 static void
6012 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6013 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6014 {
6015 if (pic_offset_table_rtx)
6016 REGNO (pic_offset_table_rtx) = REAL_PIC_OFFSET_TABLE_REGNUM;
6017 #if TARGET_MACHO
6018 /* Mach-O doesn't support labels at the end of objects, so if
6019 it looks like we might want one, insert a NOP. */
6020 {
6021 rtx insn = get_last_insn ();
6022 while (insn
6023 && NOTE_P (insn)
6024 && NOTE_LINE_NUMBER (insn) != NOTE_INSN_DELETED_LABEL)
6025 insn = PREV_INSN (insn);
6026 if (insn
6027 && (LABEL_P (insn)
6028 || (NOTE_P (insn)
6029 && NOTE_LINE_NUMBER (insn) == NOTE_INSN_DELETED_LABEL)))
6030 fputs ("\tnop\n", file);
6031 }
6032 #endif
6033
6034 }
6035 \f
6036 /* Extract the parts of an RTL expression that is a valid memory address
6037 for an instruction. Return 0 if the structure of the address is
6038 grossly off. Return -1 if the address contains ASHIFT, so it is not
6039 strictly valid, but still used for computing length of lea instruction. */
6040
6041 int
6042 ix86_decompose_address (rtx addr, struct ix86_address *out)
6043 {
6044 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6045 rtx base_reg, index_reg;
6046 HOST_WIDE_INT scale = 1;
6047 rtx scale_rtx = NULL_RTX;
6048 int retval = 1;
6049 enum ix86_address_seg seg = SEG_DEFAULT;
6050
6051 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6052 base = addr;
6053 else if (GET_CODE (addr) == PLUS)
6054 {
6055 rtx addends[4], op;
6056 int n = 0, i;
6057
6058 op = addr;
6059 do
6060 {
6061 if (n >= 4)
6062 return 0;
6063 addends[n++] = XEXP (op, 1);
6064 op = XEXP (op, 0);
6065 }
6066 while (GET_CODE (op) == PLUS);
6067 if (n >= 4)
6068 return 0;
6069 addends[n] = op;
6070
6071 for (i = n; i >= 0; --i)
6072 {
6073 op = addends[i];
6074 switch (GET_CODE (op))
6075 {
6076 case MULT:
6077 if (index)
6078 return 0;
6079 index = XEXP (op, 0);
6080 scale_rtx = XEXP (op, 1);
6081 break;
6082
6083 case UNSPEC:
6084 if (XINT (op, 1) == UNSPEC_TP
6085 && TARGET_TLS_DIRECT_SEG_REFS
6086 && seg == SEG_DEFAULT)
6087 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6088 else
6089 return 0;
6090 break;
6091
6092 case REG:
6093 case SUBREG:
6094 if (!base)
6095 base = op;
6096 else if (!index)
6097 index = op;
6098 else
6099 return 0;
6100 break;
6101
6102 case CONST:
6103 case CONST_INT:
6104 case SYMBOL_REF:
6105 case LABEL_REF:
6106 if (disp)
6107 return 0;
6108 disp = op;
6109 break;
6110
6111 default:
6112 return 0;
6113 }
6114 }
6115 }
6116 else if (GET_CODE (addr) == MULT)
6117 {
6118 index = XEXP (addr, 0); /* index*scale */
6119 scale_rtx = XEXP (addr, 1);
6120 }
6121 else if (GET_CODE (addr) == ASHIFT)
6122 {
6123 rtx tmp;
6124
6125 /* We're called for lea too, which implements ashift on occasion. */
6126 index = XEXP (addr, 0);
6127 tmp = XEXP (addr, 1);
6128 if (!CONST_INT_P (tmp))
6129 return 0;
6130 scale = INTVAL (tmp);
6131 if ((unsigned HOST_WIDE_INT) scale > 3)
6132 return 0;
6133 scale = 1 << scale;
6134 retval = -1;
6135 }
6136 else
6137 disp = addr; /* displacement */
6138
6139 /* Extract the integral value of scale. */
6140 if (scale_rtx)
6141 {
6142 if (!CONST_INT_P (scale_rtx))
6143 return 0;
6144 scale = INTVAL (scale_rtx);
6145 }
6146
6147 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6148 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6149
6150 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6151 if (base_reg && index_reg && scale == 1
6152 && (index_reg == arg_pointer_rtx
6153 || index_reg == frame_pointer_rtx
6154 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6155 {
6156 rtx tmp;
6157 tmp = base, base = index, index = tmp;
6158 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6159 }
6160
6161 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6162 if ((base_reg == hard_frame_pointer_rtx
6163 || base_reg == frame_pointer_rtx
6164 || base_reg == arg_pointer_rtx) && !disp)
6165 disp = const0_rtx;
6166
6167 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6168 Avoid this by transforming to [%esi+0]. */
6169 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6170 && base_reg && !index_reg && !disp
6171 && REG_P (base_reg)
6172 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6173 disp = const0_rtx;
6174
6175 /* Special case: encode reg+reg instead of reg*2. */
6176 if (!base && index && scale && scale == 2)
6177 base = index, base_reg = index_reg, scale = 1;
6178
6179 /* Special case: scaling cannot be encoded without base or displacement. */
6180 if (!base && !disp && index && scale != 1)
6181 disp = const0_rtx;
6182
6183 out->base = base;
6184 out->index = index;
6185 out->disp = disp;
6186 out->scale = scale;
6187 out->seg = seg;
6188
6189 return retval;
6190 }
6191 \f
6192 /* Return cost of the memory address x.
6193 For i386, it is better to use a complex address than let gcc copy
6194 the address into a reg and make a new pseudo. But not if the address
6195 requires to two regs - that would mean more pseudos with longer
6196 lifetimes. */
6197 static int
6198 ix86_address_cost (rtx x)
6199 {
6200 struct ix86_address parts;
6201 int cost = 1;
6202 int ok = ix86_decompose_address (x, &parts);
6203
6204 gcc_assert (ok);
6205
6206 if (parts.base && GET_CODE (parts.base) == SUBREG)
6207 parts.base = SUBREG_REG (parts.base);
6208 if (parts.index && GET_CODE (parts.index) == SUBREG)
6209 parts.index = SUBREG_REG (parts.index);
6210
6211 /* More complex memory references are better. */
6212 if (parts.disp && parts.disp != const0_rtx)
6213 cost--;
6214 if (parts.seg != SEG_DEFAULT)
6215 cost--;
6216
6217 /* Attempt to minimize number of registers in the address. */
6218 if ((parts.base
6219 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6220 || (parts.index
6221 && (!REG_P (parts.index)
6222 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6223 cost++;
6224
6225 if (parts.base
6226 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6227 && parts.index
6228 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6229 && parts.base != parts.index)
6230 cost++;
6231
6232 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6233 since it's predecode logic can't detect the length of instructions
6234 and it degenerates to vector decoded. Increase cost of such
6235 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6236 to split such addresses or even refuse such addresses at all.
6237
6238 Following addressing modes are affected:
6239 [base+scale*index]
6240 [scale*index+disp]
6241 [base+index]
6242
6243 The first and last case may be avoidable by explicitly coding the zero in
6244 memory address, but I don't have AMD-K6 machine handy to check this
6245 theory. */
6246
6247 if (TARGET_K6
6248 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6249 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6250 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6251 cost += 10;
6252
6253 return cost;
6254 }
6255 \f
6256 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6257 this is used for to form addresses to local data when -fPIC is in
6258 use. */
6259
6260 static bool
6261 darwin_local_data_pic (rtx disp)
6262 {
6263 if (GET_CODE (disp) == MINUS)
6264 {
6265 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6266 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6267 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6268 {
6269 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6270 if (! strcmp (sym_name, "<pic base>"))
6271 return true;
6272 }
6273 }
6274
6275 return false;
6276 }
6277
6278 /* Determine if a given RTX is a valid constant. We already know this
6279 satisfies CONSTANT_P. */
6280
6281 bool
6282 legitimate_constant_p (rtx x)
6283 {
6284 switch (GET_CODE (x))
6285 {
6286 case CONST:
6287 x = XEXP (x, 0);
6288
6289 if (GET_CODE (x) == PLUS)
6290 {
6291 if (!CONST_INT_P (XEXP (x, 1)))
6292 return false;
6293 x = XEXP (x, 0);
6294 }
6295
6296 if (TARGET_MACHO && darwin_local_data_pic (x))
6297 return true;
6298
6299 /* Only some unspecs are valid as "constants". */
6300 if (GET_CODE (x) == UNSPEC)
6301 switch (XINT (x, 1))
6302 {
6303 case UNSPEC_GOT:
6304 case UNSPEC_GOTOFF:
6305 case UNSPEC_PLTOFF:
6306 return TARGET_64BIT;
6307 case UNSPEC_TPOFF:
6308 case UNSPEC_NTPOFF:
6309 x = XVECEXP (x, 0, 0);
6310 return (GET_CODE (x) == SYMBOL_REF
6311 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6312 case UNSPEC_DTPOFF:
6313 x = XVECEXP (x, 0, 0);
6314 return (GET_CODE (x) == SYMBOL_REF
6315 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6316 default:
6317 return false;
6318 }
6319
6320 /* We must have drilled down to a symbol. */
6321 if (GET_CODE (x) == LABEL_REF)
6322 return true;
6323 if (GET_CODE (x) != SYMBOL_REF)
6324 return false;
6325 /* FALLTHRU */
6326
6327 case SYMBOL_REF:
6328 /* TLS symbols are never valid. */
6329 if (SYMBOL_REF_TLS_MODEL (x))
6330 return false;
6331 break;
6332
6333 case CONST_DOUBLE:
6334 if (GET_MODE (x) == TImode
6335 && x != CONST0_RTX (TImode)
6336 && !TARGET_64BIT)
6337 return false;
6338 break;
6339
6340 case CONST_VECTOR:
6341 if (x == CONST0_RTX (GET_MODE (x)))
6342 return true;
6343 return false;
6344
6345 default:
6346 break;
6347 }
6348
6349 /* Otherwise we handle everything else in the move patterns. */
6350 return true;
6351 }
6352
6353 /* Determine if it's legal to put X into the constant pool. This
6354 is not possible for the address of thread-local symbols, which
6355 is checked above. */
6356
6357 static bool
6358 ix86_cannot_force_const_mem (rtx x)
6359 {
6360 /* We can always put integral constants and vectors in memory. */
6361 switch (GET_CODE (x))
6362 {
6363 case CONST_INT:
6364 case CONST_DOUBLE:
6365 case CONST_VECTOR:
6366 return false;
6367
6368 default:
6369 break;
6370 }
6371 return !legitimate_constant_p (x);
6372 }
6373
6374 /* Determine if a given RTX is a valid constant address. */
6375
6376 bool
6377 constant_address_p (rtx x)
6378 {
6379 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6380 }
6381
6382 /* Nonzero if the constant value X is a legitimate general operand
6383 when generating PIC code. It is given that flag_pic is on and
6384 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6385
6386 bool
6387 legitimate_pic_operand_p (rtx x)
6388 {
6389 rtx inner;
6390
6391 switch (GET_CODE (x))
6392 {
6393 case CONST:
6394 inner = XEXP (x, 0);
6395 if (GET_CODE (inner) == PLUS
6396 && CONST_INT_P (XEXP (inner, 1)))
6397 inner = XEXP (inner, 0);
6398
6399 /* Only some unspecs are valid as "constants". */
6400 if (GET_CODE (inner) == UNSPEC)
6401 switch (XINT (inner, 1))
6402 {
6403 case UNSPEC_GOT:
6404 case UNSPEC_GOTOFF:
6405 case UNSPEC_PLTOFF:
6406 return TARGET_64BIT;
6407 case UNSPEC_TPOFF:
6408 x = XVECEXP (inner, 0, 0);
6409 return (GET_CODE (x) == SYMBOL_REF
6410 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6411 default:
6412 return false;
6413 }
6414 /* FALLTHRU */
6415
6416 case SYMBOL_REF:
6417 case LABEL_REF:
6418 return legitimate_pic_address_disp_p (x);
6419
6420 default:
6421 return true;
6422 }
6423 }
6424
6425 /* Determine if a given CONST RTX is a valid memory displacement
6426 in PIC mode. */
6427
6428 int
6429 legitimate_pic_address_disp_p (rtx disp)
6430 {
6431 bool saw_plus;
6432
6433 /* In 64bit mode we can allow direct addresses of symbols and labels
6434 when they are not dynamic symbols. */
6435 if (TARGET_64BIT)
6436 {
6437 rtx op0 = disp, op1;
6438
6439 switch (GET_CODE (disp))
6440 {
6441 case LABEL_REF:
6442 return true;
6443
6444 case CONST:
6445 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6446 break;
6447 op0 = XEXP (XEXP (disp, 0), 0);
6448 op1 = XEXP (XEXP (disp, 0), 1);
6449 if (!CONST_INT_P (op1)
6450 || INTVAL (op1) >= 16*1024*1024
6451 || INTVAL (op1) < -16*1024*1024)
6452 break;
6453 if (GET_CODE (op0) == LABEL_REF)
6454 return true;
6455 if (GET_CODE (op0) != SYMBOL_REF)
6456 break;
6457 /* FALLTHRU */
6458
6459 case SYMBOL_REF:
6460 /* TLS references should always be enclosed in UNSPEC. */
6461 if (SYMBOL_REF_TLS_MODEL (op0))
6462 return false;
6463 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6464 && ix86_cmodel != CM_LARGE_PIC)
6465 return true;
6466 break;
6467
6468 default:
6469 break;
6470 }
6471 }
6472 if (GET_CODE (disp) != CONST)
6473 return 0;
6474 disp = XEXP (disp, 0);
6475
6476 if (TARGET_64BIT)
6477 {
6478 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6479 of GOT tables. We should not need these anyway. */
6480 if (GET_CODE (disp) != UNSPEC
6481 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6482 && XINT (disp, 1) != UNSPEC_GOTOFF
6483 && XINT (disp, 1) != UNSPEC_PLTOFF))
6484 return 0;
6485
6486 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6487 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6488 return 0;
6489 return 1;
6490 }
6491
6492 saw_plus = false;
6493 if (GET_CODE (disp) == PLUS)
6494 {
6495 if (!CONST_INT_P (XEXP (disp, 1)))
6496 return 0;
6497 disp = XEXP (disp, 0);
6498 saw_plus = true;
6499 }
6500
6501 if (TARGET_MACHO && darwin_local_data_pic (disp))
6502 return 1;
6503
6504 if (GET_CODE (disp) != UNSPEC)
6505 return 0;
6506
6507 switch (XINT (disp, 1))
6508 {
6509 case UNSPEC_GOT:
6510 if (saw_plus)
6511 return false;
6512 /* We need to check for both symbols and labels because VxWorks loads
6513 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6514 details. */
6515 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6516 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6517 case UNSPEC_GOTOFF:
6518 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6519 While ABI specify also 32bit relocation but we don't produce it in
6520 small PIC model at all. */
6521 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6522 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6523 && !TARGET_64BIT)
6524 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6525 return false;
6526 case UNSPEC_GOTTPOFF:
6527 case UNSPEC_GOTNTPOFF:
6528 case UNSPEC_INDNTPOFF:
6529 if (saw_plus)
6530 return false;
6531 disp = XVECEXP (disp, 0, 0);
6532 return (GET_CODE (disp) == SYMBOL_REF
6533 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6534 case UNSPEC_NTPOFF:
6535 disp = XVECEXP (disp, 0, 0);
6536 return (GET_CODE (disp) == SYMBOL_REF
6537 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6538 case UNSPEC_DTPOFF:
6539 disp = XVECEXP (disp, 0, 0);
6540 return (GET_CODE (disp) == SYMBOL_REF
6541 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6542 }
6543
6544 return 0;
6545 }
6546
6547 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6548 memory address for an instruction. The MODE argument is the machine mode
6549 for the MEM expression that wants to use this address.
6550
6551 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6552 convert common non-canonical forms to canonical form so that they will
6553 be recognized. */
6554
6555 int
6556 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6557 rtx addr, int strict)
6558 {
6559 struct ix86_address parts;
6560 rtx base, index, disp;
6561 HOST_WIDE_INT scale;
6562 const char *reason = NULL;
6563 rtx reason_rtx = NULL_RTX;
6564
6565 if (ix86_decompose_address (addr, &parts) <= 0)
6566 {
6567 reason = "decomposition failed";
6568 goto report_error;
6569 }
6570
6571 base = parts.base;
6572 index = parts.index;
6573 disp = parts.disp;
6574 scale = parts.scale;
6575
6576 /* Validate base register.
6577
6578 Don't allow SUBREG's that span more than a word here. It can lead to spill
6579 failures when the base is one word out of a two word structure, which is
6580 represented internally as a DImode int. */
6581
6582 if (base)
6583 {
6584 rtx reg;
6585 reason_rtx = base;
6586
6587 if (REG_P (base))
6588 reg = base;
6589 else if (GET_CODE (base) == SUBREG
6590 && REG_P (SUBREG_REG (base))
6591 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
6592 <= UNITS_PER_WORD)
6593 reg = SUBREG_REG (base);
6594 else
6595 {
6596 reason = "base is not a register";
6597 goto report_error;
6598 }
6599
6600 if (GET_MODE (base) != Pmode)
6601 {
6602 reason = "base is not in Pmode";
6603 goto report_error;
6604 }
6605
6606 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
6607 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
6608 {
6609 reason = "base is not valid";
6610 goto report_error;
6611 }
6612 }
6613
6614 /* Validate index register.
6615
6616 Don't allow SUBREG's that span more than a word here -- same as above. */
6617
6618 if (index)
6619 {
6620 rtx reg;
6621 reason_rtx = index;
6622
6623 if (REG_P (index))
6624 reg = index;
6625 else if (GET_CODE (index) == SUBREG
6626 && REG_P (SUBREG_REG (index))
6627 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
6628 <= UNITS_PER_WORD)
6629 reg = SUBREG_REG (index);
6630 else
6631 {
6632 reason = "index is not a register";
6633 goto report_error;
6634 }
6635
6636 if (GET_MODE (index) != Pmode)
6637 {
6638 reason = "index is not in Pmode";
6639 goto report_error;
6640 }
6641
6642 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
6643 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
6644 {
6645 reason = "index is not valid";
6646 goto report_error;
6647 }
6648 }
6649
6650 /* Validate scale factor. */
6651 if (scale != 1)
6652 {
6653 reason_rtx = GEN_INT (scale);
6654 if (!index)
6655 {
6656 reason = "scale without index";
6657 goto report_error;
6658 }
6659
6660 if (scale != 2 && scale != 4 && scale != 8)
6661 {
6662 reason = "scale is not a valid multiplier";
6663 goto report_error;
6664 }
6665 }
6666
6667 /* Validate displacement. */
6668 if (disp)
6669 {
6670 reason_rtx = disp;
6671
6672 if (GET_CODE (disp) == CONST
6673 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
6674 switch (XINT (XEXP (disp, 0), 1))
6675 {
6676 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
6677 used. While ABI specify also 32bit relocations, we don't produce
6678 them at all and use IP relative instead. */
6679 case UNSPEC_GOT:
6680 case UNSPEC_GOTOFF:
6681 gcc_assert (flag_pic);
6682 if (!TARGET_64BIT)
6683 goto is_legitimate_pic;
6684 reason = "64bit address unspec";
6685 goto report_error;
6686
6687 case UNSPEC_GOTPCREL:
6688 gcc_assert (flag_pic);
6689 goto is_legitimate_pic;
6690
6691 case UNSPEC_GOTTPOFF:
6692 case UNSPEC_GOTNTPOFF:
6693 case UNSPEC_INDNTPOFF:
6694 case UNSPEC_NTPOFF:
6695 case UNSPEC_DTPOFF:
6696 break;
6697
6698 default:
6699 reason = "invalid address unspec";
6700 goto report_error;
6701 }
6702
6703 else if (SYMBOLIC_CONST (disp)
6704 && (flag_pic
6705 || (TARGET_MACHO
6706 #if TARGET_MACHO
6707 && MACHOPIC_INDIRECT
6708 && !machopic_operand_p (disp)
6709 #endif
6710 )))
6711 {
6712
6713 is_legitimate_pic:
6714 if (TARGET_64BIT && (index || base))
6715 {
6716 /* foo@dtpoff(%rX) is ok. */
6717 if (GET_CODE (disp) != CONST
6718 || GET_CODE (XEXP (disp, 0)) != PLUS
6719 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
6720 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
6721 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
6722 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
6723 {
6724 reason = "non-constant pic memory reference";
6725 goto report_error;
6726 }
6727 }
6728 else if (! legitimate_pic_address_disp_p (disp))
6729 {
6730 reason = "displacement is an invalid pic construct";
6731 goto report_error;
6732 }
6733
6734 /* This code used to verify that a symbolic pic displacement
6735 includes the pic_offset_table_rtx register.
6736
6737 While this is good idea, unfortunately these constructs may
6738 be created by "adds using lea" optimization for incorrect
6739 code like:
6740
6741 int a;
6742 int foo(int i)
6743 {
6744 return *(&a+i);
6745 }
6746
6747 This code is nonsensical, but results in addressing
6748 GOT table with pic_offset_table_rtx base. We can't
6749 just refuse it easily, since it gets matched by
6750 "addsi3" pattern, that later gets split to lea in the
6751 case output register differs from input. While this
6752 can be handled by separate addsi pattern for this case
6753 that never results in lea, this seems to be easier and
6754 correct fix for crash to disable this test. */
6755 }
6756 else if (GET_CODE (disp) != LABEL_REF
6757 && !CONST_INT_P (disp)
6758 && (GET_CODE (disp) != CONST
6759 || !legitimate_constant_p (disp))
6760 && (GET_CODE (disp) != SYMBOL_REF
6761 || !legitimate_constant_p (disp)))
6762 {
6763 reason = "displacement is not constant";
6764 goto report_error;
6765 }
6766 else if (TARGET_64BIT
6767 && !x86_64_immediate_operand (disp, VOIDmode))
6768 {
6769 reason = "displacement is out of range";
6770 goto report_error;
6771 }
6772 }
6773
6774 /* Everything looks valid. */
6775 return TRUE;
6776
6777 report_error:
6778 return FALSE;
6779 }
6780 \f
6781 /* Return a unique alias set for the GOT. */
6782
6783 static HOST_WIDE_INT
6784 ix86_GOT_alias_set (void)
6785 {
6786 static HOST_WIDE_INT set = -1;
6787 if (set == -1)
6788 set = new_alias_set ();
6789 return set;
6790 }
6791
6792 /* Return a legitimate reference for ORIG (an address) using the
6793 register REG. If REG is 0, a new pseudo is generated.
6794
6795 There are two types of references that must be handled:
6796
6797 1. Global data references must load the address from the GOT, via
6798 the PIC reg. An insn is emitted to do this load, and the reg is
6799 returned.
6800
6801 2. Static data references, constant pool addresses, and code labels
6802 compute the address as an offset from the GOT, whose base is in
6803 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
6804 differentiate them from global data objects. The returned
6805 address is the PIC reg + an unspec constant.
6806
6807 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
6808 reg also appears in the address. */
6809
6810 static rtx
6811 legitimize_pic_address (rtx orig, rtx reg)
6812 {
6813 rtx addr = orig;
6814 rtx new = orig;
6815 rtx base;
6816
6817 #if TARGET_MACHO
6818 if (TARGET_MACHO && !TARGET_64BIT)
6819 {
6820 if (reg == 0)
6821 reg = gen_reg_rtx (Pmode);
6822 /* Use the generic Mach-O PIC machinery. */
6823 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
6824 }
6825 #endif
6826
6827 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
6828 new = addr;
6829 else if (TARGET_64BIT
6830 && ix86_cmodel != CM_SMALL_PIC
6831 && gotoff_operand (addr, Pmode))
6832 {
6833 rtx tmpreg;
6834 /* This symbol may be referenced via a displacement from the PIC
6835 base address (@GOTOFF). */
6836
6837 if (reload_in_progress)
6838 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6839 if (GET_CODE (addr) == CONST)
6840 addr = XEXP (addr, 0);
6841 if (GET_CODE (addr) == PLUS)
6842 {
6843 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6844 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6845 }
6846 else
6847 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6848 new = gen_rtx_CONST (Pmode, new);
6849 if (!reg)
6850 tmpreg = gen_reg_rtx (Pmode);
6851 else
6852 tmpreg = reg;
6853 emit_move_insn (tmpreg, new);
6854
6855 if (reg != 0)
6856 {
6857 new = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
6858 tmpreg, 1, OPTAB_DIRECT);
6859 new = reg;
6860 }
6861 else new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
6862 }
6863 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
6864 {
6865 /* This symbol may be referenced via a displacement from the PIC
6866 base address (@GOTOFF). */
6867
6868 if (reload_in_progress)
6869 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6870 if (GET_CODE (addr) == CONST)
6871 addr = XEXP (addr, 0);
6872 if (GET_CODE (addr) == PLUS)
6873 {
6874 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)), UNSPEC_GOTOFF);
6875 new = gen_rtx_PLUS (Pmode, new, XEXP (addr, 1));
6876 }
6877 else
6878 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
6879 new = gen_rtx_CONST (Pmode, new);
6880 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6881
6882 if (reg != 0)
6883 {
6884 emit_move_insn (reg, new);
6885 new = reg;
6886 }
6887 }
6888 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
6889 /* We can't use @GOTOFF for text labels on VxWorks;
6890 see gotoff_operand. */
6891 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
6892 {
6893 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
6894 {
6895 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
6896 new = gen_rtx_CONST (Pmode, new);
6897 new = gen_const_mem (Pmode, new);
6898 set_mem_alias_set (new, ix86_GOT_alias_set ());
6899
6900 if (reg == 0)
6901 reg = gen_reg_rtx (Pmode);
6902 /* Use directly gen_movsi, otherwise the address is loaded
6903 into register for CSE. We don't want to CSE this addresses,
6904 instead we CSE addresses from the GOT table, so skip this. */
6905 emit_insn (gen_movsi (reg, new));
6906 new = reg;
6907 }
6908 else
6909 {
6910 /* This symbol must be referenced via a load from the
6911 Global Offset Table (@GOT). */
6912
6913 if (reload_in_progress)
6914 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6915 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
6916 new = gen_rtx_CONST (Pmode, new);
6917 if (TARGET_64BIT)
6918 new = force_reg (Pmode, new);
6919 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6920 new = gen_const_mem (Pmode, new);
6921 set_mem_alias_set (new, ix86_GOT_alias_set ());
6922
6923 if (reg == 0)
6924 reg = gen_reg_rtx (Pmode);
6925 emit_move_insn (reg, new);
6926 new = reg;
6927 }
6928 }
6929 else
6930 {
6931 if (CONST_INT_P (addr)
6932 && !x86_64_immediate_operand (addr, VOIDmode))
6933 {
6934 if (reg)
6935 {
6936 emit_move_insn (reg, addr);
6937 new = reg;
6938 }
6939 else
6940 new = force_reg (Pmode, addr);
6941 }
6942 else if (GET_CODE (addr) == CONST)
6943 {
6944 addr = XEXP (addr, 0);
6945
6946 /* We must match stuff we generate before. Assume the only
6947 unspecs that can get here are ours. Not that we could do
6948 anything with them anyway.... */
6949 if (GET_CODE (addr) == UNSPEC
6950 || (GET_CODE (addr) == PLUS
6951 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
6952 return orig;
6953 gcc_assert (GET_CODE (addr) == PLUS);
6954 }
6955 if (GET_CODE (addr) == PLUS)
6956 {
6957 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
6958
6959 /* Check first to see if this is a constant offset from a @GOTOFF
6960 symbol reference. */
6961 if (gotoff_operand (op0, Pmode)
6962 && CONST_INT_P (op1))
6963 {
6964 if (!TARGET_64BIT)
6965 {
6966 if (reload_in_progress)
6967 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
6968 new = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
6969 UNSPEC_GOTOFF);
6970 new = gen_rtx_PLUS (Pmode, new, op1);
6971 new = gen_rtx_CONST (Pmode, new);
6972 new = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new);
6973
6974 if (reg != 0)
6975 {
6976 emit_move_insn (reg, new);
6977 new = reg;
6978 }
6979 }
6980 else
6981 {
6982 if (INTVAL (op1) < -16*1024*1024
6983 || INTVAL (op1) >= 16*1024*1024)
6984 {
6985 if (!x86_64_immediate_operand (op1, Pmode))
6986 op1 = force_reg (Pmode, op1);
6987 new = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
6988 }
6989 }
6990 }
6991 else
6992 {
6993 base = legitimize_pic_address (XEXP (addr, 0), reg);
6994 new = legitimize_pic_address (XEXP (addr, 1),
6995 base == reg ? NULL_RTX : reg);
6996
6997 if (CONST_INT_P (new))
6998 new = plus_constant (base, INTVAL (new));
6999 else
7000 {
7001 if (GET_CODE (new) == PLUS && CONSTANT_P (XEXP (new, 1)))
7002 {
7003 base = gen_rtx_PLUS (Pmode, base, XEXP (new, 0));
7004 new = XEXP (new, 1);
7005 }
7006 new = gen_rtx_PLUS (Pmode, base, new);
7007 }
7008 }
7009 }
7010 }
7011 return new;
7012 }
7013 \f
7014 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7015
7016 static rtx
7017 get_thread_pointer (int to_reg)
7018 {
7019 rtx tp, reg, insn;
7020
7021 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7022 if (!to_reg)
7023 return tp;
7024
7025 reg = gen_reg_rtx (Pmode);
7026 insn = gen_rtx_SET (VOIDmode, reg, tp);
7027 insn = emit_insn (insn);
7028
7029 return reg;
7030 }
7031
7032 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7033 false if we expect this to be used for a memory address and true if
7034 we expect to load the address into a register. */
7035
7036 static rtx
7037 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7038 {
7039 rtx dest, base, off, pic, tp;
7040 int type;
7041
7042 switch (model)
7043 {
7044 case TLS_MODEL_GLOBAL_DYNAMIC:
7045 dest = gen_reg_rtx (Pmode);
7046 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7047
7048 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7049 {
7050 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7051
7052 start_sequence ();
7053 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7054 insns = get_insns ();
7055 end_sequence ();
7056
7057 emit_libcall_block (insns, dest, rax, x);
7058 }
7059 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7060 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7061 else
7062 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7063
7064 if (TARGET_GNU2_TLS)
7065 {
7066 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7067
7068 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7069 }
7070 break;
7071
7072 case TLS_MODEL_LOCAL_DYNAMIC:
7073 base = gen_reg_rtx (Pmode);
7074 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7075
7076 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7077 {
7078 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7079
7080 start_sequence ();
7081 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7082 insns = get_insns ();
7083 end_sequence ();
7084
7085 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7086 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7087 emit_libcall_block (insns, base, rax, note);
7088 }
7089 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7090 emit_insn (gen_tls_local_dynamic_base_64 (base));
7091 else
7092 emit_insn (gen_tls_local_dynamic_base_32 (base));
7093
7094 if (TARGET_GNU2_TLS)
7095 {
7096 rtx x = ix86_tls_module_base ();
7097
7098 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7099 gen_rtx_MINUS (Pmode, x, tp));
7100 }
7101
7102 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7103 off = gen_rtx_CONST (Pmode, off);
7104
7105 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7106
7107 if (TARGET_GNU2_TLS)
7108 {
7109 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7110
7111 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7112 }
7113
7114 break;
7115
7116 case TLS_MODEL_INITIAL_EXEC:
7117 if (TARGET_64BIT)
7118 {
7119 pic = NULL;
7120 type = UNSPEC_GOTNTPOFF;
7121 }
7122 else if (flag_pic)
7123 {
7124 if (reload_in_progress)
7125 regs_ever_live[PIC_OFFSET_TABLE_REGNUM] = 1;
7126 pic = pic_offset_table_rtx;
7127 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7128 }
7129 else if (!TARGET_ANY_GNU_TLS)
7130 {
7131 pic = gen_reg_rtx (Pmode);
7132 emit_insn (gen_set_got (pic));
7133 type = UNSPEC_GOTTPOFF;
7134 }
7135 else
7136 {
7137 pic = NULL;
7138 type = UNSPEC_INDNTPOFF;
7139 }
7140
7141 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7142 off = gen_rtx_CONST (Pmode, off);
7143 if (pic)
7144 off = gen_rtx_PLUS (Pmode, pic, off);
7145 off = gen_const_mem (Pmode, off);
7146 set_mem_alias_set (off, ix86_GOT_alias_set ());
7147
7148 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7149 {
7150 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7151 off = force_reg (Pmode, off);
7152 return gen_rtx_PLUS (Pmode, base, off);
7153 }
7154 else
7155 {
7156 base = get_thread_pointer (true);
7157 dest = gen_reg_rtx (Pmode);
7158 emit_insn (gen_subsi3 (dest, base, off));
7159 }
7160 break;
7161
7162 case TLS_MODEL_LOCAL_EXEC:
7163 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7164 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7165 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7166 off = gen_rtx_CONST (Pmode, off);
7167
7168 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7169 {
7170 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7171 return gen_rtx_PLUS (Pmode, base, off);
7172 }
7173 else
7174 {
7175 base = get_thread_pointer (true);
7176 dest = gen_reg_rtx (Pmode);
7177 emit_insn (gen_subsi3 (dest, base, off));
7178 }
7179 break;
7180
7181 default:
7182 gcc_unreachable ();
7183 }
7184
7185 return dest;
7186 }
7187
7188 /* Try machine-dependent ways of modifying an illegitimate address
7189 to be legitimate. If we find one, return the new, valid address.
7190 This macro is used in only one place: `memory_address' in explow.c.
7191
7192 OLDX is the address as it was before break_out_memory_refs was called.
7193 In some cases it is useful to look at this to decide what needs to be done.
7194
7195 MODE and WIN are passed so that this macro can use
7196 GO_IF_LEGITIMATE_ADDRESS.
7197
7198 It is always safe for this macro to do nothing. It exists to recognize
7199 opportunities to optimize the output.
7200
7201 For the 80386, we handle X+REG by loading X into a register R and
7202 using R+REG. R will go in a general reg and indexing will be used.
7203 However, if REG is a broken-out memory address or multiplication,
7204 nothing needs to be done because REG can certainly go in a general reg.
7205
7206 When -fpic is used, special handling is needed for symbolic references.
7207 See comments by legitimize_pic_address in i386.c for details. */
7208
7209 rtx
7210 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7211 {
7212 int changed = 0;
7213 unsigned log;
7214
7215 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7216 if (log)
7217 return legitimize_tls_address (x, log, false);
7218 if (GET_CODE (x) == CONST
7219 && GET_CODE (XEXP (x, 0)) == PLUS
7220 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7221 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7222 {
7223 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0), log, false);
7224 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7225 }
7226
7227 if (flag_pic && SYMBOLIC_CONST (x))
7228 return legitimize_pic_address (x, 0);
7229
7230 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7231 if (GET_CODE (x) == ASHIFT
7232 && CONST_INT_P (XEXP (x, 1))
7233 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7234 {
7235 changed = 1;
7236 log = INTVAL (XEXP (x, 1));
7237 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7238 GEN_INT (1 << log));
7239 }
7240
7241 if (GET_CODE (x) == PLUS)
7242 {
7243 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7244
7245 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7246 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7247 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7248 {
7249 changed = 1;
7250 log = INTVAL (XEXP (XEXP (x, 0), 1));
7251 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7252 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7253 GEN_INT (1 << log));
7254 }
7255
7256 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7257 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7258 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7259 {
7260 changed = 1;
7261 log = INTVAL (XEXP (XEXP (x, 1), 1));
7262 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7263 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7264 GEN_INT (1 << log));
7265 }
7266
7267 /* Put multiply first if it isn't already. */
7268 if (GET_CODE (XEXP (x, 1)) == MULT)
7269 {
7270 rtx tmp = XEXP (x, 0);
7271 XEXP (x, 0) = XEXP (x, 1);
7272 XEXP (x, 1) = tmp;
7273 changed = 1;
7274 }
7275
7276 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7277 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7278 created by virtual register instantiation, register elimination, and
7279 similar optimizations. */
7280 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7281 {
7282 changed = 1;
7283 x = gen_rtx_PLUS (Pmode,
7284 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7285 XEXP (XEXP (x, 1), 0)),
7286 XEXP (XEXP (x, 1), 1));
7287 }
7288
7289 /* Canonicalize
7290 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7291 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7292 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7293 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7294 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7295 && CONSTANT_P (XEXP (x, 1)))
7296 {
7297 rtx constant;
7298 rtx other = NULL_RTX;
7299
7300 if (CONST_INT_P (XEXP (x, 1)))
7301 {
7302 constant = XEXP (x, 1);
7303 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7304 }
7305 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7306 {
7307 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7308 other = XEXP (x, 1);
7309 }
7310 else
7311 constant = 0;
7312
7313 if (constant)
7314 {
7315 changed = 1;
7316 x = gen_rtx_PLUS (Pmode,
7317 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7318 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7319 plus_constant (other, INTVAL (constant)));
7320 }
7321 }
7322
7323 if (changed && legitimate_address_p (mode, x, FALSE))
7324 return x;
7325
7326 if (GET_CODE (XEXP (x, 0)) == MULT)
7327 {
7328 changed = 1;
7329 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7330 }
7331
7332 if (GET_CODE (XEXP (x, 1)) == MULT)
7333 {
7334 changed = 1;
7335 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7336 }
7337
7338 if (changed
7339 && REG_P (XEXP (x, 1))
7340 && REG_P (XEXP (x, 0)))
7341 return x;
7342
7343 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7344 {
7345 changed = 1;
7346 x = legitimize_pic_address (x, 0);
7347 }
7348
7349 if (changed && legitimate_address_p (mode, x, FALSE))
7350 return x;
7351
7352 if (REG_P (XEXP (x, 0)))
7353 {
7354 rtx temp = gen_reg_rtx (Pmode);
7355 rtx val = force_operand (XEXP (x, 1), temp);
7356 if (val != temp)
7357 emit_move_insn (temp, val);
7358
7359 XEXP (x, 1) = temp;
7360 return x;
7361 }
7362
7363 else if (REG_P (XEXP (x, 1)))
7364 {
7365 rtx temp = gen_reg_rtx (Pmode);
7366 rtx val = force_operand (XEXP (x, 0), temp);
7367 if (val != temp)
7368 emit_move_insn (temp, val);
7369
7370 XEXP (x, 0) = temp;
7371 return x;
7372 }
7373 }
7374
7375 return x;
7376 }
7377 \f
7378 /* Print an integer constant expression in assembler syntax. Addition
7379 and subtraction are the only arithmetic that may appear in these
7380 expressions. FILE is the stdio stream to write to, X is the rtx, and
7381 CODE is the operand print code from the output string. */
7382
7383 static void
7384 output_pic_addr_const (FILE *file, rtx x, int code)
7385 {
7386 char buf[256];
7387
7388 switch (GET_CODE (x))
7389 {
7390 case PC:
7391 gcc_assert (flag_pic);
7392 putc ('.', file);
7393 break;
7394
7395 case SYMBOL_REF:
7396 if (! TARGET_MACHO || TARGET_64BIT)
7397 output_addr_const (file, x);
7398 else
7399 {
7400 const char *name = XSTR (x, 0);
7401
7402 /* Mark the decl as referenced so that cgraph will output the function. */
7403 if (SYMBOL_REF_DECL (x))
7404 mark_decl_referenced (SYMBOL_REF_DECL (x));
7405
7406 #if TARGET_MACHO
7407 if (MACHOPIC_INDIRECT
7408 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7409 name = machopic_indirection_name (x, /*stub_p=*/true);
7410 #endif
7411 assemble_name (file, name);
7412 }
7413 if (!TARGET_MACHO && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7414 fputs ("@PLT", file);
7415 break;
7416
7417 case LABEL_REF:
7418 x = XEXP (x, 0);
7419 /* FALLTHRU */
7420 case CODE_LABEL:
7421 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7422 assemble_name (asm_out_file, buf);
7423 break;
7424
7425 case CONST_INT:
7426 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7427 break;
7428
7429 case CONST:
7430 /* This used to output parentheses around the expression,
7431 but that does not work on the 386 (either ATT or BSD assembler). */
7432 output_pic_addr_const (file, XEXP (x, 0), code);
7433 break;
7434
7435 case CONST_DOUBLE:
7436 if (GET_MODE (x) == VOIDmode)
7437 {
7438 /* We can use %d if the number is <32 bits and positive. */
7439 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7440 fprintf (file, "0x%lx%08lx",
7441 (unsigned long) CONST_DOUBLE_HIGH (x),
7442 (unsigned long) CONST_DOUBLE_LOW (x));
7443 else
7444 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7445 }
7446 else
7447 /* We can't handle floating point constants;
7448 PRINT_OPERAND must handle them. */
7449 output_operand_lossage ("floating constant misused");
7450 break;
7451
7452 case PLUS:
7453 /* Some assemblers need integer constants to appear first. */
7454 if (CONST_INT_P (XEXP (x, 0)))
7455 {
7456 output_pic_addr_const (file, XEXP (x, 0), code);
7457 putc ('+', file);
7458 output_pic_addr_const (file, XEXP (x, 1), code);
7459 }
7460 else
7461 {
7462 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7463 output_pic_addr_const (file, XEXP (x, 1), code);
7464 putc ('+', file);
7465 output_pic_addr_const (file, XEXP (x, 0), code);
7466 }
7467 break;
7468
7469 case MINUS:
7470 if (!TARGET_MACHO)
7471 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7472 output_pic_addr_const (file, XEXP (x, 0), code);
7473 putc ('-', file);
7474 output_pic_addr_const (file, XEXP (x, 1), code);
7475 if (!TARGET_MACHO)
7476 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
7477 break;
7478
7479 case UNSPEC:
7480 gcc_assert (XVECLEN (x, 0) == 1);
7481 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
7482 switch (XINT (x, 1))
7483 {
7484 case UNSPEC_GOT:
7485 fputs ("@GOT", file);
7486 break;
7487 case UNSPEC_GOTOFF:
7488 fputs ("@GOTOFF", file);
7489 break;
7490 case UNSPEC_PLTOFF:
7491 fputs ("@PLTOFF", file);
7492 break;
7493 case UNSPEC_GOTPCREL:
7494 fputs ("@GOTPCREL(%rip)", file);
7495 break;
7496 case UNSPEC_GOTTPOFF:
7497 /* FIXME: This might be @TPOFF in Sun ld too. */
7498 fputs ("@GOTTPOFF", file);
7499 break;
7500 case UNSPEC_TPOFF:
7501 fputs ("@TPOFF", file);
7502 break;
7503 case UNSPEC_NTPOFF:
7504 if (TARGET_64BIT)
7505 fputs ("@TPOFF", file);
7506 else
7507 fputs ("@NTPOFF", file);
7508 break;
7509 case UNSPEC_DTPOFF:
7510 fputs ("@DTPOFF", file);
7511 break;
7512 case UNSPEC_GOTNTPOFF:
7513 if (TARGET_64BIT)
7514 fputs ("@GOTTPOFF(%rip)", file);
7515 else
7516 fputs ("@GOTNTPOFF", file);
7517 break;
7518 case UNSPEC_INDNTPOFF:
7519 fputs ("@INDNTPOFF", file);
7520 break;
7521 default:
7522 output_operand_lossage ("invalid UNSPEC as operand");
7523 break;
7524 }
7525 break;
7526
7527 default:
7528 output_operand_lossage ("invalid expression as operand");
7529 }
7530 }
7531
7532 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
7533 We need to emit DTP-relative relocations. */
7534
7535 static void ATTRIBUTE_UNUSED
7536 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
7537 {
7538 fputs (ASM_LONG, file);
7539 output_addr_const (file, x);
7540 fputs ("@DTPOFF", file);
7541 switch (size)
7542 {
7543 case 4:
7544 break;
7545 case 8:
7546 fputs (", 0", file);
7547 break;
7548 default:
7549 gcc_unreachable ();
7550 }
7551 }
7552
7553 /* In the name of slightly smaller debug output, and to cater to
7554 general assembler lossage, recognize PIC+GOTOFF and turn it back
7555 into a direct symbol reference.
7556
7557 On Darwin, this is necessary to avoid a crash, because Darwin
7558 has a different PIC label for each routine but the DWARF debugging
7559 information is not associated with any particular routine, so it's
7560 necessary to remove references to the PIC label from RTL stored by
7561 the DWARF output code. */
7562
7563 static rtx
7564 ix86_delegitimize_address (rtx orig_x)
7565 {
7566 rtx x = orig_x;
7567 /* reg_addend is NULL or a multiple of some register. */
7568 rtx reg_addend = NULL_RTX;
7569 /* const_addend is NULL or a const_int. */
7570 rtx const_addend = NULL_RTX;
7571 /* This is the result, or NULL. */
7572 rtx result = NULL_RTX;
7573
7574 if (MEM_P (x))
7575 x = XEXP (x, 0);
7576
7577 if (TARGET_64BIT)
7578 {
7579 if (GET_CODE (x) != CONST
7580 || GET_CODE (XEXP (x, 0)) != UNSPEC
7581 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
7582 || !MEM_P (orig_x))
7583 return orig_x;
7584 return XVECEXP (XEXP (x, 0), 0, 0);
7585 }
7586
7587 if (GET_CODE (x) != PLUS
7588 || GET_CODE (XEXP (x, 1)) != CONST)
7589 return orig_x;
7590
7591 if (REG_P (XEXP (x, 0))
7592 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
7593 /* %ebx + GOT/GOTOFF */
7594 ;
7595 else if (GET_CODE (XEXP (x, 0)) == PLUS)
7596 {
7597 /* %ebx + %reg * scale + GOT/GOTOFF */
7598 reg_addend = XEXP (x, 0);
7599 if (REG_P (XEXP (reg_addend, 0))
7600 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
7601 reg_addend = XEXP (reg_addend, 1);
7602 else if (REG_P (XEXP (reg_addend, 1))
7603 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
7604 reg_addend = XEXP (reg_addend, 0);
7605 else
7606 return orig_x;
7607 if (!REG_P (reg_addend)
7608 && GET_CODE (reg_addend) != MULT
7609 && GET_CODE (reg_addend) != ASHIFT)
7610 return orig_x;
7611 }
7612 else
7613 return orig_x;
7614
7615 x = XEXP (XEXP (x, 1), 0);
7616 if (GET_CODE (x) == PLUS
7617 && CONST_INT_P (XEXP (x, 1)))
7618 {
7619 const_addend = XEXP (x, 1);
7620 x = XEXP (x, 0);
7621 }
7622
7623 if (GET_CODE (x) == UNSPEC
7624 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
7625 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
7626 result = XVECEXP (x, 0, 0);
7627
7628 if (TARGET_MACHO && darwin_local_data_pic (x)
7629 && !MEM_P (orig_x))
7630 result = XEXP (x, 0);
7631
7632 if (! result)
7633 return orig_x;
7634
7635 if (const_addend)
7636 result = gen_rtx_PLUS (Pmode, result, const_addend);
7637 if (reg_addend)
7638 result = gen_rtx_PLUS (Pmode, reg_addend, result);
7639 return result;
7640 }
7641
7642 /* If X is a machine specific address (i.e. a symbol or label being
7643 referenced as a displacement from the GOT implemented using an
7644 UNSPEC), then return the base term. Otherwise return X. */
7645
7646 rtx
7647 ix86_find_base_term (rtx x)
7648 {
7649 rtx term;
7650
7651 if (TARGET_64BIT)
7652 {
7653 if (GET_CODE (x) != CONST)
7654 return x;
7655 term = XEXP (x, 0);
7656 if (GET_CODE (term) == PLUS
7657 && (CONST_INT_P (XEXP (term, 1))
7658 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
7659 term = XEXP (term, 0);
7660 if (GET_CODE (term) != UNSPEC
7661 || XINT (term, 1) != UNSPEC_GOTPCREL)
7662 return x;
7663
7664 term = XVECEXP (term, 0, 0);
7665
7666 if (GET_CODE (term) != SYMBOL_REF
7667 && GET_CODE (term) != LABEL_REF)
7668 return x;
7669
7670 return term;
7671 }
7672
7673 term = ix86_delegitimize_address (x);
7674
7675 if (GET_CODE (term) != SYMBOL_REF
7676 && GET_CODE (term) != LABEL_REF)
7677 return x;
7678
7679 return term;
7680 }
7681 \f
7682 static void
7683 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
7684 int fp, FILE *file)
7685 {
7686 const char *suffix;
7687
7688 if (mode == CCFPmode || mode == CCFPUmode)
7689 {
7690 enum rtx_code second_code, bypass_code;
7691 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
7692 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
7693 code = ix86_fp_compare_code_to_integer (code);
7694 mode = CCmode;
7695 }
7696 if (reverse)
7697 code = reverse_condition (code);
7698
7699 switch (code)
7700 {
7701 case EQ:
7702 suffix = "e";
7703 break;
7704 case NE:
7705 suffix = "ne";
7706 break;
7707 case GT:
7708 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
7709 suffix = "g";
7710 break;
7711 case GTU:
7712 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
7713 Those same assemblers have the same but opposite lossage on cmov. */
7714 gcc_assert (mode == CCmode);
7715 suffix = fp ? "nbe" : "a";
7716 break;
7717 case LT:
7718 switch (mode)
7719 {
7720 case CCNOmode:
7721 case CCGOCmode:
7722 suffix = "s";
7723 break;
7724
7725 case CCmode:
7726 case CCGCmode:
7727 suffix = "l";
7728 break;
7729
7730 default:
7731 gcc_unreachable ();
7732 }
7733 break;
7734 case LTU:
7735 gcc_assert (mode == CCmode);
7736 suffix = "b";
7737 break;
7738 case GE:
7739 switch (mode)
7740 {
7741 case CCNOmode:
7742 case CCGOCmode:
7743 suffix = "ns";
7744 break;
7745
7746 case CCmode:
7747 case CCGCmode:
7748 suffix = "ge";
7749 break;
7750
7751 default:
7752 gcc_unreachable ();
7753 }
7754 break;
7755 case GEU:
7756 /* ??? As above. */
7757 gcc_assert (mode == CCmode);
7758 suffix = fp ? "nb" : "ae";
7759 break;
7760 case LE:
7761 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
7762 suffix = "le";
7763 break;
7764 case LEU:
7765 gcc_assert (mode == CCmode);
7766 suffix = "be";
7767 break;
7768 case UNORDERED:
7769 suffix = fp ? "u" : "p";
7770 break;
7771 case ORDERED:
7772 suffix = fp ? "nu" : "np";
7773 break;
7774 default:
7775 gcc_unreachable ();
7776 }
7777 fputs (suffix, file);
7778 }
7779
7780 /* Print the name of register X to FILE based on its machine mode and number.
7781 If CODE is 'w', pretend the mode is HImode.
7782 If CODE is 'b', pretend the mode is QImode.
7783 If CODE is 'k', pretend the mode is SImode.
7784 If CODE is 'q', pretend the mode is DImode.
7785 If CODE is 'h', pretend the reg is the 'high' byte register.
7786 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
7787
7788 void
7789 print_reg (rtx x, int code, FILE *file)
7790 {
7791 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
7792 && REGNO (x) != FRAME_POINTER_REGNUM
7793 && REGNO (x) != FLAGS_REG
7794 && REGNO (x) != FPSR_REG
7795 && REGNO (x) != FPCR_REG);
7796
7797 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
7798 putc ('%', file);
7799
7800 if (code == 'w' || MMX_REG_P (x))
7801 code = 2;
7802 else if (code == 'b')
7803 code = 1;
7804 else if (code == 'k')
7805 code = 4;
7806 else if (code == 'q')
7807 code = 8;
7808 else if (code == 'y')
7809 code = 3;
7810 else if (code == 'h')
7811 code = 0;
7812 else
7813 code = GET_MODE_SIZE (GET_MODE (x));
7814
7815 /* Irritatingly, AMD extended registers use different naming convention
7816 from the normal registers. */
7817 if (REX_INT_REG_P (x))
7818 {
7819 gcc_assert (TARGET_64BIT);
7820 switch (code)
7821 {
7822 case 0:
7823 error ("extended registers have no high halves");
7824 break;
7825 case 1:
7826 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
7827 break;
7828 case 2:
7829 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
7830 break;
7831 case 4:
7832 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
7833 break;
7834 case 8:
7835 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
7836 break;
7837 default:
7838 error ("unsupported operand size for extended register");
7839 break;
7840 }
7841 return;
7842 }
7843 switch (code)
7844 {
7845 case 3:
7846 if (STACK_TOP_P (x))
7847 {
7848 fputs ("st(0)", file);
7849 break;
7850 }
7851 /* FALLTHRU */
7852 case 8:
7853 case 4:
7854 case 12:
7855 if (! ANY_FP_REG_P (x))
7856 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
7857 /* FALLTHRU */
7858 case 16:
7859 case 2:
7860 normal:
7861 fputs (hi_reg_name[REGNO (x)], file);
7862 break;
7863 case 1:
7864 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
7865 goto normal;
7866 fputs (qi_reg_name[REGNO (x)], file);
7867 break;
7868 case 0:
7869 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
7870 goto normal;
7871 fputs (qi_high_reg_name[REGNO (x)], file);
7872 break;
7873 default:
7874 gcc_unreachable ();
7875 }
7876 }
7877
7878 /* Locate some local-dynamic symbol still in use by this function
7879 so that we can print its name in some tls_local_dynamic_base
7880 pattern. */
7881
7882 static int
7883 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
7884 {
7885 rtx x = *px;
7886
7887 if (GET_CODE (x) == SYMBOL_REF
7888 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
7889 {
7890 cfun->machine->some_ld_name = XSTR (x, 0);
7891 return 1;
7892 }
7893
7894 return 0;
7895 }
7896
7897 static const char *
7898 get_some_local_dynamic_name (void)
7899 {
7900 rtx insn;
7901
7902 if (cfun->machine->some_ld_name)
7903 return cfun->machine->some_ld_name;
7904
7905 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
7906 if (INSN_P (insn)
7907 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
7908 return cfun->machine->some_ld_name;
7909
7910 gcc_unreachable ();
7911 }
7912
7913 /* Meaning of CODE:
7914 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
7915 C -- print opcode suffix for set/cmov insn.
7916 c -- like C, but print reversed condition
7917 F,f -- likewise, but for floating-point.
7918 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
7919 otherwise nothing
7920 R -- print the prefix for register names.
7921 z -- print the opcode suffix for the size of the current operand.
7922 * -- print a star (in certain assembler syntax)
7923 A -- print an absolute memory reference.
7924 w -- print the operand as if it's a "word" (HImode) even if it isn't.
7925 s -- print a shift double count, followed by the assemblers argument
7926 delimiter.
7927 b -- print the QImode name of the register for the indicated operand.
7928 %b0 would print %al if operands[0] is reg 0.
7929 w -- likewise, print the HImode name of the register.
7930 k -- likewise, print the SImode name of the register.
7931 q -- likewise, print the DImode name of the register.
7932 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
7933 y -- print "st(0)" instead of "st" as a register.
7934 D -- print condition for SSE cmp instruction.
7935 P -- if PIC, print an @PLT suffix.
7936 X -- don't print any sort of PIC '@' suffix for a symbol.
7937 & -- print some in-use local-dynamic symbol name.
7938 H -- print a memory address offset by 8; used for sse high-parts
7939 */
7940
7941 void
7942 print_operand (FILE *file, rtx x, int code)
7943 {
7944 if (code)
7945 {
7946 switch (code)
7947 {
7948 case '*':
7949 if (ASSEMBLER_DIALECT == ASM_ATT)
7950 putc ('*', file);
7951 return;
7952
7953 case '&':
7954 assemble_name (file, get_some_local_dynamic_name ());
7955 return;
7956
7957 case 'A':
7958 switch (ASSEMBLER_DIALECT)
7959 {
7960 case ASM_ATT:
7961 putc ('*', file);
7962 break;
7963
7964 case ASM_INTEL:
7965 /* Intel syntax. For absolute addresses, registers should not
7966 be surrounded by braces. */
7967 if (!REG_P (x))
7968 {
7969 putc ('[', file);
7970 PRINT_OPERAND (file, x, 0);
7971 putc (']', file);
7972 return;
7973 }
7974 break;
7975
7976 default:
7977 gcc_unreachable ();
7978 }
7979
7980 PRINT_OPERAND (file, x, 0);
7981 return;
7982
7983
7984 case 'L':
7985 if (ASSEMBLER_DIALECT == ASM_ATT)
7986 putc ('l', file);
7987 return;
7988
7989 case 'W':
7990 if (ASSEMBLER_DIALECT == ASM_ATT)
7991 putc ('w', file);
7992 return;
7993
7994 case 'B':
7995 if (ASSEMBLER_DIALECT == ASM_ATT)
7996 putc ('b', file);
7997 return;
7998
7999 case 'Q':
8000 if (ASSEMBLER_DIALECT == ASM_ATT)
8001 putc ('l', file);
8002 return;
8003
8004 case 'S':
8005 if (ASSEMBLER_DIALECT == ASM_ATT)
8006 putc ('s', file);
8007 return;
8008
8009 case 'T':
8010 if (ASSEMBLER_DIALECT == ASM_ATT)
8011 putc ('t', file);
8012 return;
8013
8014 case 'z':
8015 /* 387 opcodes don't get size suffixes if the operands are
8016 registers. */
8017 if (STACK_REG_P (x))
8018 return;
8019
8020 /* Likewise if using Intel opcodes. */
8021 if (ASSEMBLER_DIALECT == ASM_INTEL)
8022 return;
8023
8024 /* This is the size of op from size of operand. */
8025 switch (GET_MODE_SIZE (GET_MODE (x)))
8026 {
8027 case 1:
8028 putc ('b', file);
8029 return;
8030
8031 case 2:
8032 #ifdef HAVE_GAS_FILDS_FISTS
8033 putc ('s', file);
8034 #endif
8035 return;
8036
8037 case 4:
8038 if (GET_MODE (x) == SFmode)
8039 {
8040 putc ('s', file);
8041 return;
8042 }
8043 else
8044 putc ('l', file);
8045 return;
8046
8047 case 12:
8048 case 16:
8049 putc ('t', file);
8050 return;
8051
8052 case 8:
8053 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8054 {
8055 #ifdef GAS_MNEMONICS
8056 putc ('q', file);
8057 #else
8058 putc ('l', file);
8059 putc ('l', file);
8060 #endif
8061 }
8062 else
8063 putc ('l', file);
8064 return;
8065
8066 default:
8067 gcc_unreachable ();
8068 }
8069
8070 case 'b':
8071 case 'w':
8072 case 'k':
8073 case 'q':
8074 case 'h':
8075 case 'y':
8076 case 'X':
8077 case 'P':
8078 break;
8079
8080 case 's':
8081 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8082 {
8083 PRINT_OPERAND (file, x, 0);
8084 putc (',', file);
8085 }
8086 return;
8087
8088 case 'D':
8089 /* Little bit of braindamage here. The SSE compare instructions
8090 does use completely different names for the comparisons that the
8091 fp conditional moves. */
8092 switch (GET_CODE (x))
8093 {
8094 case EQ:
8095 case UNEQ:
8096 fputs ("eq", file);
8097 break;
8098 case LT:
8099 case UNLT:
8100 fputs ("lt", file);
8101 break;
8102 case LE:
8103 case UNLE:
8104 fputs ("le", file);
8105 break;
8106 case UNORDERED:
8107 fputs ("unord", file);
8108 break;
8109 case NE:
8110 case LTGT:
8111 fputs ("neq", file);
8112 break;
8113 case UNGE:
8114 case GE:
8115 fputs ("nlt", file);
8116 break;
8117 case UNGT:
8118 case GT:
8119 fputs ("nle", file);
8120 break;
8121 case ORDERED:
8122 fputs ("ord", file);
8123 break;
8124 default:
8125 gcc_unreachable ();
8126 }
8127 return;
8128 case 'O':
8129 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8130 if (ASSEMBLER_DIALECT == ASM_ATT)
8131 {
8132 switch (GET_MODE (x))
8133 {
8134 case HImode: putc ('w', file); break;
8135 case SImode:
8136 case SFmode: putc ('l', file); break;
8137 case DImode:
8138 case DFmode: putc ('q', file); break;
8139 default: gcc_unreachable ();
8140 }
8141 putc ('.', file);
8142 }
8143 #endif
8144 return;
8145 case 'C':
8146 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8147 return;
8148 case 'F':
8149 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8150 if (ASSEMBLER_DIALECT == ASM_ATT)
8151 putc ('.', file);
8152 #endif
8153 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8154 return;
8155
8156 /* Like above, but reverse condition */
8157 case 'c':
8158 /* Check to see if argument to %c is really a constant
8159 and not a condition code which needs to be reversed. */
8160 if (!COMPARISON_P (x))
8161 {
8162 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8163 return;
8164 }
8165 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8166 return;
8167 case 'f':
8168 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8169 if (ASSEMBLER_DIALECT == ASM_ATT)
8170 putc ('.', file);
8171 #endif
8172 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8173 return;
8174
8175 case 'H':
8176 /* It doesn't actually matter what mode we use here, as we're
8177 only going to use this for printing. */
8178 x = adjust_address_nv (x, DImode, 8);
8179 break;
8180
8181 case '+':
8182 {
8183 rtx x;
8184
8185 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8186 return;
8187
8188 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8189 if (x)
8190 {
8191 int pred_val = INTVAL (XEXP (x, 0));
8192
8193 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8194 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8195 {
8196 int taken = pred_val > REG_BR_PROB_BASE / 2;
8197 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8198
8199 /* Emit hints only in the case default branch prediction
8200 heuristics would fail. */
8201 if (taken != cputaken)
8202 {
8203 /* We use 3e (DS) prefix for taken branches and
8204 2e (CS) prefix for not taken branches. */
8205 if (taken)
8206 fputs ("ds ; ", file);
8207 else
8208 fputs ("cs ; ", file);
8209 }
8210 }
8211 }
8212 return;
8213 }
8214 default:
8215 output_operand_lossage ("invalid operand code '%c'", code);
8216 }
8217 }
8218
8219 if (REG_P (x))
8220 print_reg (x, code, file);
8221
8222 else if (MEM_P (x))
8223 {
8224 /* No `byte ptr' prefix for call instructions. */
8225 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8226 {
8227 const char * size;
8228 switch (GET_MODE_SIZE (GET_MODE (x)))
8229 {
8230 case 1: size = "BYTE"; break;
8231 case 2: size = "WORD"; break;
8232 case 4: size = "DWORD"; break;
8233 case 8: size = "QWORD"; break;
8234 case 12: size = "XWORD"; break;
8235 case 16: size = "XMMWORD"; break;
8236 default:
8237 gcc_unreachable ();
8238 }
8239
8240 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8241 if (code == 'b')
8242 size = "BYTE";
8243 else if (code == 'w')
8244 size = "WORD";
8245 else if (code == 'k')
8246 size = "DWORD";
8247
8248 fputs (size, file);
8249 fputs (" PTR ", file);
8250 }
8251
8252 x = XEXP (x, 0);
8253 /* Avoid (%rip) for call operands. */
8254 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8255 && !CONST_INT_P (x))
8256 output_addr_const (file, x);
8257 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8258 output_operand_lossage ("invalid constraints for operand");
8259 else
8260 output_address (x);
8261 }
8262
8263 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8264 {
8265 REAL_VALUE_TYPE r;
8266 long l;
8267
8268 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8269 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8270
8271 if (ASSEMBLER_DIALECT == ASM_ATT)
8272 putc ('$', file);
8273 fprintf (file, "0x%08lx", l);
8274 }
8275
8276 /* These float cases don't actually occur as immediate operands. */
8277 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8278 {
8279 char dstr[30];
8280
8281 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8282 fprintf (file, "%s", dstr);
8283 }
8284
8285 else if (GET_CODE (x) == CONST_DOUBLE
8286 && GET_MODE (x) == XFmode)
8287 {
8288 char dstr[30];
8289
8290 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8291 fprintf (file, "%s", dstr);
8292 }
8293
8294 else
8295 {
8296 /* We have patterns that allow zero sets of memory, for instance.
8297 In 64-bit mode, we should probably support all 8-byte vectors,
8298 since we can in fact encode that into an immediate. */
8299 if (GET_CODE (x) == CONST_VECTOR)
8300 {
8301 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8302 x = const0_rtx;
8303 }
8304
8305 if (code != 'P')
8306 {
8307 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8308 {
8309 if (ASSEMBLER_DIALECT == ASM_ATT)
8310 putc ('$', file);
8311 }
8312 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8313 || GET_CODE (x) == LABEL_REF)
8314 {
8315 if (ASSEMBLER_DIALECT == ASM_ATT)
8316 putc ('$', file);
8317 else
8318 fputs ("OFFSET FLAT:", file);
8319 }
8320 }
8321 if (CONST_INT_P (x))
8322 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8323 else if (flag_pic)
8324 output_pic_addr_const (file, x, code);
8325 else
8326 output_addr_const (file, x);
8327 }
8328 }
8329 \f
8330 /* Print a memory operand whose address is ADDR. */
8331
8332 void
8333 print_operand_address (FILE *file, rtx addr)
8334 {
8335 struct ix86_address parts;
8336 rtx base, index, disp;
8337 int scale;
8338 int ok = ix86_decompose_address (addr, &parts);
8339
8340 gcc_assert (ok);
8341
8342 base = parts.base;
8343 index = parts.index;
8344 disp = parts.disp;
8345 scale = parts.scale;
8346
8347 switch (parts.seg)
8348 {
8349 case SEG_DEFAULT:
8350 break;
8351 case SEG_FS:
8352 case SEG_GS:
8353 if (USER_LABEL_PREFIX[0] == 0)
8354 putc ('%', file);
8355 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8356 break;
8357 default:
8358 gcc_unreachable ();
8359 }
8360
8361 if (!base && !index)
8362 {
8363 /* Displacement only requires special attention. */
8364
8365 if (CONST_INT_P (disp))
8366 {
8367 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8368 {
8369 if (USER_LABEL_PREFIX[0] == 0)
8370 putc ('%', file);
8371 fputs ("ds:", file);
8372 }
8373 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8374 }
8375 else if (flag_pic)
8376 output_pic_addr_const (file, disp, 0);
8377 else
8378 output_addr_const (file, disp);
8379
8380 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8381 if (TARGET_64BIT)
8382 {
8383 if (GET_CODE (disp) == CONST
8384 && GET_CODE (XEXP (disp, 0)) == PLUS
8385 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8386 disp = XEXP (XEXP (disp, 0), 0);
8387 if (GET_CODE (disp) == LABEL_REF
8388 || (GET_CODE (disp) == SYMBOL_REF
8389 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8390 fputs ("(%rip)", file);
8391 }
8392 }
8393 else
8394 {
8395 if (ASSEMBLER_DIALECT == ASM_ATT)
8396 {
8397 if (disp)
8398 {
8399 if (flag_pic)
8400 output_pic_addr_const (file, disp, 0);
8401 else if (GET_CODE (disp) == LABEL_REF)
8402 output_asm_label (disp);
8403 else
8404 output_addr_const (file, disp);
8405 }
8406
8407 putc ('(', file);
8408 if (base)
8409 print_reg (base, 0, file);
8410 if (index)
8411 {
8412 putc (',', file);
8413 print_reg (index, 0, file);
8414 if (scale != 1)
8415 fprintf (file, ",%d", scale);
8416 }
8417 putc (')', file);
8418 }
8419 else
8420 {
8421 rtx offset = NULL_RTX;
8422
8423 if (disp)
8424 {
8425 /* Pull out the offset of a symbol; print any symbol itself. */
8426 if (GET_CODE (disp) == CONST
8427 && GET_CODE (XEXP (disp, 0)) == PLUS
8428 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8429 {
8430 offset = XEXP (XEXP (disp, 0), 1);
8431 disp = gen_rtx_CONST (VOIDmode,
8432 XEXP (XEXP (disp, 0), 0));
8433 }
8434
8435 if (flag_pic)
8436 output_pic_addr_const (file, disp, 0);
8437 else if (GET_CODE (disp) == LABEL_REF)
8438 output_asm_label (disp);
8439 else if (CONST_INT_P (disp))
8440 offset = disp;
8441 else
8442 output_addr_const (file, disp);
8443 }
8444
8445 putc ('[', file);
8446 if (base)
8447 {
8448 print_reg (base, 0, file);
8449 if (offset)
8450 {
8451 if (INTVAL (offset) >= 0)
8452 putc ('+', file);
8453 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8454 }
8455 }
8456 else if (offset)
8457 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
8458 else
8459 putc ('0', file);
8460
8461 if (index)
8462 {
8463 putc ('+', file);
8464 print_reg (index, 0, file);
8465 if (scale != 1)
8466 fprintf (file, "*%d", scale);
8467 }
8468 putc (']', file);
8469 }
8470 }
8471 }
8472
8473 bool
8474 output_addr_const_extra (FILE *file, rtx x)
8475 {
8476 rtx op;
8477
8478 if (GET_CODE (x) != UNSPEC)
8479 return false;
8480
8481 op = XVECEXP (x, 0, 0);
8482 switch (XINT (x, 1))
8483 {
8484 case UNSPEC_GOTTPOFF:
8485 output_addr_const (file, op);
8486 /* FIXME: This might be @TPOFF in Sun ld. */
8487 fputs ("@GOTTPOFF", file);
8488 break;
8489 case UNSPEC_TPOFF:
8490 output_addr_const (file, op);
8491 fputs ("@TPOFF", file);
8492 break;
8493 case UNSPEC_NTPOFF:
8494 output_addr_const (file, op);
8495 if (TARGET_64BIT)
8496 fputs ("@TPOFF", file);
8497 else
8498 fputs ("@NTPOFF", file);
8499 break;
8500 case UNSPEC_DTPOFF:
8501 output_addr_const (file, op);
8502 fputs ("@DTPOFF", file);
8503 break;
8504 case UNSPEC_GOTNTPOFF:
8505 output_addr_const (file, op);
8506 if (TARGET_64BIT)
8507 fputs ("@GOTTPOFF(%rip)", file);
8508 else
8509 fputs ("@GOTNTPOFF", file);
8510 break;
8511 case UNSPEC_INDNTPOFF:
8512 output_addr_const (file, op);
8513 fputs ("@INDNTPOFF", file);
8514 break;
8515
8516 default:
8517 return false;
8518 }
8519
8520 return true;
8521 }
8522 \f
8523 /* Split one or more DImode RTL references into pairs of SImode
8524 references. The RTL can be REG, offsettable MEM, integer constant, or
8525 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8526 split and "num" is its length. lo_half and hi_half are output arrays
8527 that parallel "operands". */
8528
8529 void
8530 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8531 {
8532 while (num--)
8533 {
8534 rtx op = operands[num];
8535
8536 /* simplify_subreg refuse to split volatile memory addresses,
8537 but we still have to handle it. */
8538 if (MEM_P (op))
8539 {
8540 lo_half[num] = adjust_address (op, SImode, 0);
8541 hi_half[num] = adjust_address (op, SImode, 4);
8542 }
8543 else
8544 {
8545 lo_half[num] = simplify_gen_subreg (SImode, op,
8546 GET_MODE (op) == VOIDmode
8547 ? DImode : GET_MODE (op), 0);
8548 hi_half[num] = simplify_gen_subreg (SImode, op,
8549 GET_MODE (op) == VOIDmode
8550 ? DImode : GET_MODE (op), 4);
8551 }
8552 }
8553 }
8554 /* Split one or more TImode RTL references into pairs of DImode
8555 references. The RTL can be REG, offsettable MEM, integer constant, or
8556 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
8557 split and "num" is its length. lo_half and hi_half are output arrays
8558 that parallel "operands". */
8559
8560 void
8561 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
8562 {
8563 while (num--)
8564 {
8565 rtx op = operands[num];
8566
8567 /* simplify_subreg refuse to split volatile memory addresses, but we
8568 still have to handle it. */
8569 if (MEM_P (op))
8570 {
8571 lo_half[num] = adjust_address (op, DImode, 0);
8572 hi_half[num] = adjust_address (op, DImode, 8);
8573 }
8574 else
8575 {
8576 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
8577 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
8578 }
8579 }
8580 }
8581 \f
8582 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
8583 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
8584 is the expression of the binary operation. The output may either be
8585 emitted here, or returned to the caller, like all output_* functions.
8586
8587 There is no guarantee that the operands are the same mode, as they
8588 might be within FLOAT or FLOAT_EXTEND expressions. */
8589
8590 #ifndef SYSV386_COMPAT
8591 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
8592 wants to fix the assemblers because that causes incompatibility
8593 with gcc. No-one wants to fix gcc because that causes
8594 incompatibility with assemblers... You can use the option of
8595 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
8596 #define SYSV386_COMPAT 1
8597 #endif
8598
8599 const char *
8600 output_387_binary_op (rtx insn, rtx *operands)
8601 {
8602 static char buf[30];
8603 const char *p;
8604 const char *ssep;
8605 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
8606
8607 #ifdef ENABLE_CHECKING
8608 /* Even if we do not want to check the inputs, this documents input
8609 constraints. Which helps in understanding the following code. */
8610 if (STACK_REG_P (operands[0])
8611 && ((REG_P (operands[1])
8612 && REGNO (operands[0]) == REGNO (operands[1])
8613 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
8614 || (REG_P (operands[2])
8615 && REGNO (operands[0]) == REGNO (operands[2])
8616 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
8617 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
8618 ; /* ok */
8619 else
8620 gcc_assert (is_sse);
8621 #endif
8622
8623 switch (GET_CODE (operands[3]))
8624 {
8625 case PLUS:
8626 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8627 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8628 p = "fiadd";
8629 else
8630 p = "fadd";
8631 ssep = "add";
8632 break;
8633
8634 case MINUS:
8635 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8636 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8637 p = "fisub";
8638 else
8639 p = "fsub";
8640 ssep = "sub";
8641 break;
8642
8643 case MULT:
8644 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8645 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8646 p = "fimul";
8647 else
8648 p = "fmul";
8649 ssep = "mul";
8650 break;
8651
8652 case DIV:
8653 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
8654 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
8655 p = "fidiv";
8656 else
8657 p = "fdiv";
8658 ssep = "div";
8659 break;
8660
8661 default:
8662 gcc_unreachable ();
8663 }
8664
8665 if (is_sse)
8666 {
8667 strcpy (buf, ssep);
8668 if (GET_MODE (operands[0]) == SFmode)
8669 strcat (buf, "ss\t{%2, %0|%0, %2}");
8670 else
8671 strcat (buf, "sd\t{%2, %0|%0, %2}");
8672 return buf;
8673 }
8674 strcpy (buf, p);
8675
8676 switch (GET_CODE (operands[3]))
8677 {
8678 case MULT:
8679 case PLUS:
8680 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
8681 {
8682 rtx temp = operands[2];
8683 operands[2] = operands[1];
8684 operands[1] = temp;
8685 }
8686
8687 /* know operands[0] == operands[1]. */
8688
8689 if (MEM_P (operands[2]))
8690 {
8691 p = "%z2\t%2";
8692 break;
8693 }
8694
8695 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8696 {
8697 if (STACK_TOP_P (operands[0]))
8698 /* How is it that we are storing to a dead operand[2]?
8699 Well, presumably operands[1] is dead too. We can't
8700 store the result to st(0) as st(0) gets popped on this
8701 instruction. Instead store to operands[2] (which I
8702 think has to be st(1)). st(1) will be popped later.
8703 gcc <= 2.8.1 didn't have this check and generated
8704 assembly code that the Unixware assembler rejected. */
8705 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8706 else
8707 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8708 break;
8709 }
8710
8711 if (STACK_TOP_P (operands[0]))
8712 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8713 else
8714 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8715 break;
8716
8717 case MINUS:
8718 case DIV:
8719 if (MEM_P (operands[1]))
8720 {
8721 p = "r%z1\t%1";
8722 break;
8723 }
8724
8725 if (MEM_P (operands[2]))
8726 {
8727 p = "%z2\t%2";
8728 break;
8729 }
8730
8731 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
8732 {
8733 #if SYSV386_COMPAT
8734 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
8735 derived assemblers, confusingly reverse the direction of
8736 the operation for fsub{r} and fdiv{r} when the
8737 destination register is not st(0). The Intel assembler
8738 doesn't have this brain damage. Read !SYSV386_COMPAT to
8739 figure out what the hardware really does. */
8740 if (STACK_TOP_P (operands[0]))
8741 p = "{p\t%0, %2|rp\t%2, %0}";
8742 else
8743 p = "{rp\t%2, %0|p\t%0, %2}";
8744 #else
8745 if (STACK_TOP_P (operands[0]))
8746 /* As above for fmul/fadd, we can't store to st(0). */
8747 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
8748 else
8749 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
8750 #endif
8751 break;
8752 }
8753
8754 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
8755 {
8756 #if SYSV386_COMPAT
8757 if (STACK_TOP_P (operands[0]))
8758 p = "{rp\t%0, %1|p\t%1, %0}";
8759 else
8760 p = "{p\t%1, %0|rp\t%0, %1}";
8761 #else
8762 if (STACK_TOP_P (operands[0]))
8763 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
8764 else
8765 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
8766 #endif
8767 break;
8768 }
8769
8770 if (STACK_TOP_P (operands[0]))
8771 {
8772 if (STACK_TOP_P (operands[1]))
8773 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
8774 else
8775 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
8776 break;
8777 }
8778 else if (STACK_TOP_P (operands[1]))
8779 {
8780 #if SYSV386_COMPAT
8781 p = "{\t%1, %0|r\t%0, %1}";
8782 #else
8783 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
8784 #endif
8785 }
8786 else
8787 {
8788 #if SYSV386_COMPAT
8789 p = "{r\t%2, %0|\t%0, %2}";
8790 #else
8791 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
8792 #endif
8793 }
8794 break;
8795
8796 default:
8797 gcc_unreachable ();
8798 }
8799
8800 strcat (buf, p);
8801 return buf;
8802 }
8803
8804 /* Return needed mode for entity in optimize_mode_switching pass. */
8805
8806 int
8807 ix86_mode_needed (int entity, rtx insn)
8808 {
8809 enum attr_i387_cw mode;
8810
8811 /* The mode UNINITIALIZED is used to store control word after a
8812 function call or ASM pattern. The mode ANY specify that function
8813 has no requirements on the control word and make no changes in the
8814 bits we are interested in. */
8815
8816 if (CALL_P (insn)
8817 || (NONJUMP_INSN_P (insn)
8818 && (asm_noperands (PATTERN (insn)) >= 0
8819 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
8820 return I387_CW_UNINITIALIZED;
8821
8822 if (recog_memoized (insn) < 0)
8823 return I387_CW_ANY;
8824
8825 mode = get_attr_i387_cw (insn);
8826
8827 switch (entity)
8828 {
8829 case I387_TRUNC:
8830 if (mode == I387_CW_TRUNC)
8831 return mode;
8832 break;
8833
8834 case I387_FLOOR:
8835 if (mode == I387_CW_FLOOR)
8836 return mode;
8837 break;
8838
8839 case I387_CEIL:
8840 if (mode == I387_CW_CEIL)
8841 return mode;
8842 break;
8843
8844 case I387_MASK_PM:
8845 if (mode == I387_CW_MASK_PM)
8846 return mode;
8847 break;
8848
8849 default:
8850 gcc_unreachable ();
8851 }
8852
8853 return I387_CW_ANY;
8854 }
8855
8856 /* Output code to initialize control word copies used by trunc?f?i and
8857 rounding patterns. CURRENT_MODE is set to current control word,
8858 while NEW_MODE is set to new control word. */
8859
8860 void
8861 emit_i387_cw_initialization (int mode)
8862 {
8863 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
8864 rtx new_mode;
8865
8866 int slot;
8867
8868 rtx reg = gen_reg_rtx (HImode);
8869
8870 emit_insn (gen_x86_fnstcw_1 (stored_mode));
8871 emit_move_insn (reg, copy_rtx (stored_mode));
8872
8873 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
8874 {
8875 switch (mode)
8876 {
8877 case I387_CW_TRUNC:
8878 /* round toward zero (truncate) */
8879 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
8880 slot = SLOT_CW_TRUNC;
8881 break;
8882
8883 case I387_CW_FLOOR:
8884 /* round down toward -oo */
8885 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8886 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
8887 slot = SLOT_CW_FLOOR;
8888 break;
8889
8890 case I387_CW_CEIL:
8891 /* round up toward +oo */
8892 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
8893 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
8894 slot = SLOT_CW_CEIL;
8895 break;
8896
8897 case I387_CW_MASK_PM:
8898 /* mask precision exception for nearbyint() */
8899 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8900 slot = SLOT_CW_MASK_PM;
8901 break;
8902
8903 default:
8904 gcc_unreachable ();
8905 }
8906 }
8907 else
8908 {
8909 switch (mode)
8910 {
8911 case I387_CW_TRUNC:
8912 /* round toward zero (truncate) */
8913 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
8914 slot = SLOT_CW_TRUNC;
8915 break;
8916
8917 case I387_CW_FLOOR:
8918 /* round down toward -oo */
8919 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
8920 slot = SLOT_CW_FLOOR;
8921 break;
8922
8923 case I387_CW_CEIL:
8924 /* round up toward +oo */
8925 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
8926 slot = SLOT_CW_CEIL;
8927 break;
8928
8929 case I387_CW_MASK_PM:
8930 /* mask precision exception for nearbyint() */
8931 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
8932 slot = SLOT_CW_MASK_PM;
8933 break;
8934
8935 default:
8936 gcc_unreachable ();
8937 }
8938 }
8939
8940 gcc_assert (slot < MAX_386_STACK_LOCALS);
8941
8942 new_mode = assign_386_stack_local (HImode, slot);
8943 emit_move_insn (new_mode, reg);
8944 }
8945
8946 /* Output code for INSN to convert a float to a signed int. OPERANDS
8947 are the insn operands. The output may be [HSD]Imode and the input
8948 operand may be [SDX]Fmode. */
8949
8950 const char *
8951 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
8952 {
8953 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
8954 int dimode_p = GET_MODE (operands[0]) == DImode;
8955 int round_mode = get_attr_i387_cw (insn);
8956
8957 /* Jump through a hoop or two for DImode, since the hardware has no
8958 non-popping instruction. We used to do this a different way, but
8959 that was somewhat fragile and broke with post-reload splitters. */
8960 if ((dimode_p || fisttp) && !stack_top_dies)
8961 output_asm_insn ("fld\t%y1", operands);
8962
8963 gcc_assert (STACK_TOP_P (operands[1]));
8964 gcc_assert (MEM_P (operands[0]));
8965
8966 if (fisttp)
8967 output_asm_insn ("fisttp%z0\t%0", operands);
8968 else
8969 {
8970 if (round_mode != I387_CW_ANY)
8971 output_asm_insn ("fldcw\t%3", operands);
8972 if (stack_top_dies || dimode_p)
8973 output_asm_insn ("fistp%z0\t%0", operands);
8974 else
8975 output_asm_insn ("fist%z0\t%0", operands);
8976 if (round_mode != I387_CW_ANY)
8977 output_asm_insn ("fldcw\t%2", operands);
8978 }
8979
8980 return "";
8981 }
8982
8983 /* Output code for x87 ffreep insn. The OPNO argument, which may only
8984 have the values zero or one, indicates the ffreep insn's operand
8985 from the OPERANDS array. */
8986
8987 static const char *
8988 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
8989 {
8990 if (TARGET_USE_FFREEP)
8991 #if HAVE_AS_IX86_FFREEP
8992 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
8993 #else
8994 {
8995 static char retval[] = ".word\t0xc_df";
8996 int regno = REGNO (operands[opno]);
8997
8998 gcc_assert (FP_REGNO_P (regno));
8999
9000 retval[9] = '0' + (regno - FIRST_STACK_REG);
9001 return retval;
9002 }
9003 #endif
9004
9005 return opno ? "fstp\t%y1" : "fstp\t%y0";
9006 }
9007
9008
9009 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9010 should be used. UNORDERED_P is true when fucom should be used. */
9011
9012 const char *
9013 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9014 {
9015 int stack_top_dies;
9016 rtx cmp_op0, cmp_op1;
9017 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9018
9019 if (eflags_p)
9020 {
9021 cmp_op0 = operands[0];
9022 cmp_op1 = operands[1];
9023 }
9024 else
9025 {
9026 cmp_op0 = operands[1];
9027 cmp_op1 = operands[2];
9028 }
9029
9030 if (is_sse)
9031 {
9032 if (GET_MODE (operands[0]) == SFmode)
9033 if (unordered_p)
9034 return "ucomiss\t{%1, %0|%0, %1}";
9035 else
9036 return "comiss\t{%1, %0|%0, %1}";
9037 else
9038 if (unordered_p)
9039 return "ucomisd\t{%1, %0|%0, %1}";
9040 else
9041 return "comisd\t{%1, %0|%0, %1}";
9042 }
9043
9044 gcc_assert (STACK_TOP_P (cmp_op0));
9045
9046 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9047
9048 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9049 {
9050 if (stack_top_dies)
9051 {
9052 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9053 return output_387_ffreep (operands, 1);
9054 }
9055 else
9056 return "ftst\n\tfnstsw\t%0";
9057 }
9058
9059 if (STACK_REG_P (cmp_op1)
9060 && stack_top_dies
9061 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9062 && REGNO (cmp_op1) != FIRST_STACK_REG)
9063 {
9064 /* If both the top of the 387 stack dies, and the other operand
9065 is also a stack register that dies, then this must be a
9066 `fcompp' float compare */
9067
9068 if (eflags_p)
9069 {
9070 /* There is no double popping fcomi variant. Fortunately,
9071 eflags is immune from the fstp's cc clobbering. */
9072 if (unordered_p)
9073 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9074 else
9075 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9076 return output_387_ffreep (operands, 0);
9077 }
9078 else
9079 {
9080 if (unordered_p)
9081 return "fucompp\n\tfnstsw\t%0";
9082 else
9083 return "fcompp\n\tfnstsw\t%0";
9084 }
9085 }
9086 else
9087 {
9088 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9089
9090 static const char * const alt[16] =
9091 {
9092 "fcom%z2\t%y2\n\tfnstsw\t%0",
9093 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9094 "fucom%z2\t%y2\n\tfnstsw\t%0",
9095 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9096
9097 "ficom%z2\t%y2\n\tfnstsw\t%0",
9098 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9099 NULL,
9100 NULL,
9101
9102 "fcomi\t{%y1, %0|%0, %y1}",
9103 "fcomip\t{%y1, %0|%0, %y1}",
9104 "fucomi\t{%y1, %0|%0, %y1}",
9105 "fucomip\t{%y1, %0|%0, %y1}",
9106
9107 NULL,
9108 NULL,
9109 NULL,
9110 NULL
9111 };
9112
9113 int mask;
9114 const char *ret;
9115
9116 mask = eflags_p << 3;
9117 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9118 mask |= unordered_p << 1;
9119 mask |= stack_top_dies;
9120
9121 gcc_assert (mask < 16);
9122 ret = alt[mask];
9123 gcc_assert (ret);
9124
9125 return ret;
9126 }
9127 }
9128
9129 void
9130 ix86_output_addr_vec_elt (FILE *file, int value)
9131 {
9132 const char *directive = ASM_LONG;
9133
9134 #ifdef ASM_QUAD
9135 if (TARGET_64BIT)
9136 directive = ASM_QUAD;
9137 #else
9138 gcc_assert (!TARGET_64BIT);
9139 #endif
9140
9141 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9142 }
9143
9144 void
9145 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9146 {
9147 const char *directive = ASM_LONG;
9148
9149 #ifdef ASM_QUAD
9150 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9151 directive = ASM_QUAD;
9152 #else
9153 gcc_assert (!TARGET_64BIT);
9154 #endif
9155 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9156 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9157 fprintf (file, "%s%s%d-%s%d\n",
9158 directive, LPREFIX, value, LPREFIX, rel);
9159 else if (HAVE_AS_GOTOFF_IN_DATA)
9160 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9161 #if TARGET_MACHO
9162 else if (TARGET_MACHO)
9163 {
9164 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9165 machopic_output_function_base_name (file);
9166 fprintf(file, "\n");
9167 }
9168 #endif
9169 else
9170 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9171 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9172 }
9173 \f
9174 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9175 for the target. */
9176
9177 void
9178 ix86_expand_clear (rtx dest)
9179 {
9180 rtx tmp;
9181
9182 /* We play register width games, which are only valid after reload. */
9183 gcc_assert (reload_completed);
9184
9185 /* Avoid HImode and its attendant prefix byte. */
9186 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9187 dest = gen_rtx_REG (SImode, REGNO (dest));
9188
9189 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9190
9191 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9192 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9193 {
9194 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, 17));
9195 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9196 }
9197
9198 emit_insn (tmp);
9199 }
9200
9201 /* X is an unchanging MEM. If it is a constant pool reference, return
9202 the constant pool rtx, else NULL. */
9203
9204 rtx
9205 maybe_get_pool_constant (rtx x)
9206 {
9207 x = ix86_delegitimize_address (XEXP (x, 0));
9208
9209 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9210 return get_pool_constant (x);
9211
9212 return NULL_RTX;
9213 }
9214
9215 void
9216 ix86_expand_move (enum machine_mode mode, rtx operands[])
9217 {
9218 int strict = (reload_in_progress || reload_completed);
9219 rtx op0, op1;
9220 enum tls_model model;
9221
9222 op0 = operands[0];
9223 op1 = operands[1];
9224
9225 if (GET_CODE (op1) == SYMBOL_REF)
9226 {
9227 model = SYMBOL_REF_TLS_MODEL (op1);
9228 if (model)
9229 {
9230 op1 = legitimize_tls_address (op1, model, true);
9231 op1 = force_operand (op1, op0);
9232 if (op1 == op0)
9233 return;
9234 }
9235 }
9236 else if (GET_CODE (op1) == CONST
9237 && GET_CODE (XEXP (op1, 0)) == PLUS
9238 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9239 {
9240 model = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (op1, 0), 0));
9241 if (model)
9242 {
9243 rtx addend = XEXP (XEXP (op1, 0), 1);
9244 op1 = legitimize_tls_address (XEXP (XEXP (op1, 0), 0), model, true);
9245 op1 = force_operand (op1, NULL);
9246 op1 = expand_simple_binop (Pmode, PLUS, op1, addend,
9247 op0, 1, OPTAB_DIRECT);
9248 if (op1 == op0)
9249 return;
9250 }
9251 }
9252
9253 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9254 {
9255 if (TARGET_MACHO && !TARGET_64BIT)
9256 {
9257 #if TARGET_MACHO
9258 if (MACHOPIC_PURE)
9259 {
9260 rtx temp = ((reload_in_progress
9261 || ((op0 && REG_P (op0))
9262 && mode == Pmode))
9263 ? op0 : gen_reg_rtx (Pmode));
9264 op1 = machopic_indirect_data_reference (op1, temp);
9265 op1 = machopic_legitimize_pic_address (op1, mode,
9266 temp == op1 ? 0 : temp);
9267 }
9268 else if (MACHOPIC_INDIRECT)
9269 op1 = machopic_indirect_data_reference (op1, 0);
9270 if (op0 == op1)
9271 return;
9272 #endif
9273 }
9274 else
9275 {
9276 if (MEM_P (op0))
9277 op1 = force_reg (Pmode, op1);
9278 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9279 {
9280 rtx reg = no_new_pseudos ? op0 : NULL_RTX;
9281 op1 = legitimize_pic_address (op1, reg);
9282 if (op0 == op1)
9283 return;
9284 }
9285 }
9286 }
9287 else
9288 {
9289 if (MEM_P (op0)
9290 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9291 || !push_operand (op0, mode))
9292 && MEM_P (op1))
9293 op1 = force_reg (mode, op1);
9294
9295 if (push_operand (op0, mode)
9296 && ! general_no_elim_operand (op1, mode))
9297 op1 = copy_to_mode_reg (mode, op1);
9298
9299 /* Force large constants in 64bit compilation into register
9300 to get them CSEed. */
9301 if (TARGET_64BIT && mode == DImode
9302 && immediate_operand (op1, mode)
9303 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9304 && !register_operand (op0, mode)
9305 && optimize && !reload_completed && !reload_in_progress)
9306 op1 = copy_to_mode_reg (mode, op1);
9307
9308 if (FLOAT_MODE_P (mode))
9309 {
9310 /* If we are loading a floating point constant to a register,
9311 force the value to memory now, since we'll get better code
9312 out the back end. */
9313
9314 if (strict)
9315 ;
9316 else if (GET_CODE (op1) == CONST_DOUBLE)
9317 {
9318 op1 = validize_mem (force_const_mem (mode, op1));
9319 if (!register_operand (op0, mode))
9320 {
9321 rtx temp = gen_reg_rtx (mode);
9322 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9323 emit_move_insn (op0, temp);
9324 return;
9325 }
9326 }
9327 }
9328 }
9329
9330 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9331 }
9332
9333 void
9334 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9335 {
9336 rtx op0 = operands[0], op1 = operands[1];
9337
9338 /* Force constants other than zero into memory. We do not know how
9339 the instructions used to build constants modify the upper 64 bits
9340 of the register, once we have that information we may be able
9341 to handle some of them more efficiently. */
9342 if ((reload_in_progress | reload_completed) == 0
9343 && register_operand (op0, mode)
9344 && CONSTANT_P (op1)
9345 && standard_sse_constant_p (op1) <= 0)
9346 op1 = validize_mem (force_const_mem (mode, op1));
9347
9348 /* Make operand1 a register if it isn't already. */
9349 if (!no_new_pseudos
9350 && !register_operand (op0, mode)
9351 && !register_operand (op1, mode))
9352 {
9353 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9354 return;
9355 }
9356
9357 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9358 }
9359
9360 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9361 straight to ix86_expand_vector_move. */
9362 /* Code generation for scalar reg-reg moves of single and double precision data:
9363 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9364 movaps reg, reg
9365 else
9366 movss reg, reg
9367 if (x86_sse_partial_reg_dependency == true)
9368 movapd reg, reg
9369 else
9370 movsd reg, reg
9371
9372 Code generation for scalar loads of double precision data:
9373 if (x86_sse_split_regs == true)
9374 movlpd mem, reg (gas syntax)
9375 else
9376 movsd mem, reg
9377
9378 Code generation for unaligned packed loads of single precision data
9379 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
9380 if (x86_sse_unaligned_move_optimal)
9381 movups mem, reg
9382
9383 if (x86_sse_partial_reg_dependency == true)
9384 {
9385 xorps reg, reg
9386 movlps mem, reg
9387 movhps mem+8, reg
9388 }
9389 else
9390 {
9391 movlps mem, reg
9392 movhps mem+8, reg
9393 }
9394
9395 Code generation for unaligned packed loads of double precision data
9396 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
9397 if (x86_sse_unaligned_move_optimal)
9398 movupd mem, reg
9399
9400 if (x86_sse_split_regs == true)
9401 {
9402 movlpd mem, reg
9403 movhpd mem+8, reg
9404 }
9405 else
9406 {
9407 movsd mem, reg
9408 movhpd mem+8, reg
9409 }
9410 */
9411
9412 void
9413 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
9414 {
9415 rtx op0, op1, m;
9416
9417 op0 = operands[0];
9418 op1 = operands[1];
9419
9420 if (MEM_P (op1))
9421 {
9422 /* If we're optimizing for size, movups is the smallest. */
9423 if (optimize_size)
9424 {
9425 op0 = gen_lowpart (V4SFmode, op0);
9426 op1 = gen_lowpart (V4SFmode, op1);
9427 emit_insn (gen_sse_movups (op0, op1));
9428 return;
9429 }
9430
9431 /* ??? If we have typed data, then it would appear that using
9432 movdqu is the only way to get unaligned data loaded with
9433 integer type. */
9434 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9435 {
9436 op0 = gen_lowpart (V16QImode, op0);
9437 op1 = gen_lowpart (V16QImode, op1);
9438 emit_insn (gen_sse2_movdqu (op0, op1));
9439 return;
9440 }
9441
9442 if (TARGET_SSE2 && mode == V2DFmode)
9443 {
9444 rtx zero;
9445
9446 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9447 {
9448 op0 = gen_lowpart (V2DFmode, op0);
9449 op1 = gen_lowpart (V2DFmode, op1);
9450 emit_insn (gen_sse2_movupd (op0, op1));
9451 return;
9452 }
9453
9454 /* When SSE registers are split into halves, we can avoid
9455 writing to the top half twice. */
9456 if (TARGET_SSE_SPLIT_REGS)
9457 {
9458 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9459 zero = op0;
9460 }
9461 else
9462 {
9463 /* ??? Not sure about the best option for the Intel chips.
9464 The following would seem to satisfy; the register is
9465 entirely cleared, breaking the dependency chain. We
9466 then store to the upper half, with a dependency depth
9467 of one. A rumor has it that Intel recommends two movsd
9468 followed by an unpacklpd, but this is unconfirmed. And
9469 given that the dependency depth of the unpacklpd would
9470 still be one, I'm not sure why this would be better. */
9471 zero = CONST0_RTX (V2DFmode);
9472 }
9473
9474 m = adjust_address (op1, DFmode, 0);
9475 emit_insn (gen_sse2_loadlpd (op0, zero, m));
9476 m = adjust_address (op1, DFmode, 8);
9477 emit_insn (gen_sse2_loadhpd (op0, op0, m));
9478 }
9479 else
9480 {
9481 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
9482 {
9483 op0 = gen_lowpart (V4SFmode, op0);
9484 op1 = gen_lowpart (V4SFmode, op1);
9485 emit_insn (gen_sse_movups (op0, op1));
9486 return;
9487 }
9488
9489 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
9490 emit_move_insn (op0, CONST0_RTX (mode));
9491 else
9492 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
9493
9494 if (mode != V4SFmode)
9495 op0 = gen_lowpart (V4SFmode, op0);
9496 m = adjust_address (op1, V2SFmode, 0);
9497 emit_insn (gen_sse_loadlps (op0, op0, m));
9498 m = adjust_address (op1, V2SFmode, 8);
9499 emit_insn (gen_sse_loadhps (op0, op0, m));
9500 }
9501 }
9502 else if (MEM_P (op0))
9503 {
9504 /* If we're optimizing for size, movups is the smallest. */
9505 if (optimize_size)
9506 {
9507 op0 = gen_lowpart (V4SFmode, op0);
9508 op1 = gen_lowpart (V4SFmode, op1);
9509 emit_insn (gen_sse_movups (op0, op1));
9510 return;
9511 }
9512
9513 /* ??? Similar to above, only less clear because of quote
9514 typeless stores unquote. */
9515 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
9516 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
9517 {
9518 op0 = gen_lowpart (V16QImode, op0);
9519 op1 = gen_lowpart (V16QImode, op1);
9520 emit_insn (gen_sse2_movdqu (op0, op1));
9521 return;
9522 }
9523
9524 if (TARGET_SSE2 && mode == V2DFmode)
9525 {
9526 m = adjust_address (op0, DFmode, 0);
9527 emit_insn (gen_sse2_storelpd (m, op1));
9528 m = adjust_address (op0, DFmode, 8);
9529 emit_insn (gen_sse2_storehpd (m, op1));
9530 }
9531 else
9532 {
9533 if (mode != V4SFmode)
9534 op1 = gen_lowpart (V4SFmode, op1);
9535 m = adjust_address (op0, V2SFmode, 0);
9536 emit_insn (gen_sse_storelps (m, op1));
9537 m = adjust_address (op0, V2SFmode, 8);
9538 emit_insn (gen_sse_storehps (m, op1));
9539 }
9540 }
9541 else
9542 gcc_unreachable ();
9543 }
9544
9545 /* Expand a push in MODE. This is some mode for which we do not support
9546 proper push instructions, at least from the registers that we expect
9547 the value to live in. */
9548
9549 void
9550 ix86_expand_push (enum machine_mode mode, rtx x)
9551 {
9552 rtx tmp;
9553
9554 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
9555 GEN_INT (-GET_MODE_SIZE (mode)),
9556 stack_pointer_rtx, 1, OPTAB_DIRECT);
9557 if (tmp != stack_pointer_rtx)
9558 emit_move_insn (stack_pointer_rtx, tmp);
9559
9560 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
9561 emit_move_insn (tmp, x);
9562 }
9563
9564 /* Helper function of ix86_fixup_binary_operands to canonicalize
9565 operand order. Returns true if the operands should be swapped. */
9566
9567 static bool
9568 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
9569 rtx operands[])
9570 {
9571 rtx dst = operands[0];
9572 rtx src1 = operands[1];
9573 rtx src2 = operands[2];
9574
9575 /* If the operation is not commutative, we can't do anything. */
9576 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
9577 return false;
9578
9579 /* Highest priority is that src1 should match dst. */
9580 if (rtx_equal_p (dst, src1))
9581 return false;
9582 if (rtx_equal_p (dst, src2))
9583 return true;
9584
9585 /* Next highest priority is that immediate constants come second. */
9586 if (immediate_operand (src2, mode))
9587 return false;
9588 if (immediate_operand (src1, mode))
9589 return true;
9590
9591 /* Lowest priority is that memory references should come second. */
9592 if (MEM_P (src2))
9593 return false;
9594 if (MEM_P (src1))
9595 return true;
9596
9597 return false;
9598 }
9599
9600
9601 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
9602 destination to use for the operation. If different from the true
9603 destination in operands[0], a copy operation will be required. */
9604
9605 rtx
9606 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
9607 rtx operands[])
9608 {
9609 rtx dst = operands[0];
9610 rtx src1 = operands[1];
9611 rtx src2 = operands[2];
9612
9613 /* Canonicalize operand order. */
9614 if (ix86_swap_binary_operands_p (code, mode, operands))
9615 {
9616 rtx temp = src1;
9617 src1 = src2;
9618 src2 = temp;
9619 }
9620
9621 /* Both source operands cannot be in memory. */
9622 if (MEM_P (src1) && MEM_P (src2))
9623 {
9624 /* Optimization: Only read from memory once. */
9625 if (rtx_equal_p (src1, src2))
9626 {
9627 src2 = force_reg (mode, src2);
9628 src1 = src2;
9629 }
9630 else
9631 src2 = force_reg (mode, src2);
9632 }
9633
9634 /* If the destination is memory, and we do not have matching source
9635 operands, do things in registers. */
9636 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9637 dst = gen_reg_rtx (mode);
9638
9639 /* Source 1 cannot be a constant. */
9640 if (CONSTANT_P (src1))
9641 src1 = force_reg (mode, src1);
9642
9643 /* Source 1 cannot be a non-matching memory. */
9644 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9645 src1 = force_reg (mode, src1);
9646
9647 operands[1] = src1;
9648 operands[2] = src2;
9649 return dst;
9650 }
9651
9652 /* Similarly, but assume that the destination has already been
9653 set up properly. */
9654
9655 void
9656 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
9657 enum machine_mode mode, rtx operands[])
9658 {
9659 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
9660 gcc_assert (dst == operands[0]);
9661 }
9662
9663 /* Attempt to expand a binary operator. Make the expansion closer to the
9664 actual machine, then just general_operand, which will allow 3 separate
9665 memory references (one output, two input) in a single insn. */
9666
9667 void
9668 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
9669 rtx operands[])
9670 {
9671 rtx src1, src2, dst, op, clob;
9672
9673 dst = ix86_fixup_binary_operands (code, mode, operands);
9674 src1 = operands[1];
9675 src2 = operands[2];
9676
9677 /* Emit the instruction. */
9678
9679 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
9680 if (reload_in_progress)
9681 {
9682 /* Reload doesn't know about the flags register, and doesn't know that
9683 it doesn't want to clobber it. We can only do this with PLUS. */
9684 gcc_assert (code == PLUS);
9685 emit_insn (op);
9686 }
9687 else
9688 {
9689 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9690 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9691 }
9692
9693 /* Fix up the destination if needed. */
9694 if (dst != operands[0])
9695 emit_move_insn (operands[0], dst);
9696 }
9697
9698 /* Return TRUE or FALSE depending on whether the binary operator meets the
9699 appropriate constraints. */
9700
9701 int
9702 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
9703 rtx operands[3])
9704 {
9705 rtx dst = operands[0];
9706 rtx src1 = operands[1];
9707 rtx src2 = operands[2];
9708
9709 /* Both source operands cannot be in memory. */
9710 if (MEM_P (src1) && MEM_P (src2))
9711 return 0;
9712
9713 /* Canonicalize operand order for commutative operators. */
9714 if (ix86_swap_binary_operands_p (code, mode, operands))
9715 {
9716 rtx temp = src1;
9717 src1 = src2;
9718 src2 = temp;
9719 }
9720
9721 /* If the destination is memory, we must have a matching source operand. */
9722 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
9723 return 0;
9724
9725 /* Source 1 cannot be a constant. */
9726 if (CONSTANT_P (src1))
9727 return 0;
9728
9729 /* Source 1 cannot be a non-matching memory. */
9730 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
9731 return 0;
9732
9733 return 1;
9734 }
9735
9736 /* Attempt to expand a unary operator. Make the expansion closer to the
9737 actual machine, then just general_operand, which will allow 2 separate
9738 memory references (one output, one input) in a single insn. */
9739
9740 void
9741 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
9742 rtx operands[])
9743 {
9744 int matching_memory;
9745 rtx src, dst, op, clob;
9746
9747 dst = operands[0];
9748 src = operands[1];
9749
9750 /* If the destination is memory, and we do not have matching source
9751 operands, do things in registers. */
9752 matching_memory = 0;
9753 if (MEM_P (dst))
9754 {
9755 if (rtx_equal_p (dst, src))
9756 matching_memory = 1;
9757 else
9758 dst = gen_reg_rtx (mode);
9759 }
9760
9761 /* When source operand is memory, destination must match. */
9762 if (MEM_P (src) && !matching_memory)
9763 src = force_reg (mode, src);
9764
9765 /* Emit the instruction. */
9766
9767 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
9768 if (reload_in_progress || code == NOT)
9769 {
9770 /* Reload doesn't know about the flags register, and doesn't know that
9771 it doesn't want to clobber it. */
9772 gcc_assert (code == NOT);
9773 emit_insn (op);
9774 }
9775 else
9776 {
9777 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9778 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
9779 }
9780
9781 /* Fix up the destination if needed. */
9782 if (dst != operands[0])
9783 emit_move_insn (operands[0], dst);
9784 }
9785
9786 /* Return TRUE or FALSE depending on whether the unary operator meets the
9787 appropriate constraints. */
9788
9789 int
9790 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
9791 enum machine_mode mode ATTRIBUTE_UNUSED,
9792 rtx operands[2] ATTRIBUTE_UNUSED)
9793 {
9794 /* If one of operands is memory, source and destination must match. */
9795 if ((MEM_P (operands[0])
9796 || MEM_P (operands[1]))
9797 && ! rtx_equal_p (operands[0], operands[1]))
9798 return FALSE;
9799 return TRUE;
9800 }
9801
9802 /* Post-reload splitter for converting an SF or DFmode value in an
9803 SSE register into an unsigned SImode. */
9804
9805 void
9806 ix86_split_convert_uns_si_sse (rtx operands[])
9807 {
9808 enum machine_mode vecmode;
9809 rtx value, large, zero_or_two31, input, two31, x;
9810
9811 large = operands[1];
9812 zero_or_two31 = operands[2];
9813 input = operands[3];
9814 two31 = operands[4];
9815 vecmode = GET_MODE (large);
9816 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
9817
9818 /* Load up the value into the low element. We must ensure that the other
9819 elements are valid floats -- zero is the easiest such value. */
9820 if (MEM_P (input))
9821 {
9822 if (vecmode == V4SFmode)
9823 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
9824 else
9825 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
9826 }
9827 else
9828 {
9829 input = gen_rtx_REG (vecmode, REGNO (input));
9830 emit_move_insn (value, CONST0_RTX (vecmode));
9831 if (vecmode == V4SFmode)
9832 emit_insn (gen_sse_movss (value, value, input));
9833 else
9834 emit_insn (gen_sse2_movsd (value, value, input));
9835 }
9836
9837 emit_move_insn (large, two31);
9838 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
9839
9840 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
9841 emit_insn (gen_rtx_SET (VOIDmode, large, x));
9842
9843 x = gen_rtx_AND (vecmode, zero_or_two31, large);
9844 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
9845
9846 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
9847 emit_insn (gen_rtx_SET (VOIDmode, value, x));
9848
9849 large = gen_rtx_REG (V4SImode, REGNO (large));
9850 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
9851
9852 x = gen_rtx_REG (V4SImode, REGNO (value));
9853 if (vecmode == V4SFmode)
9854 emit_insn (gen_sse2_cvttps2dq (x, value));
9855 else
9856 emit_insn (gen_sse2_cvttpd2dq (x, value));
9857 value = x;
9858
9859 emit_insn (gen_xorv4si3 (value, value, large));
9860 }
9861
9862 /* Convert an unsigned DImode value into a DFmode, using only SSE.
9863 Expects the 64-bit DImode to be supplied in a pair of integral
9864 registers. Requires SSE2; will use SSE3 if available. For x86_32,
9865 -mfpmath=sse, !optimize_size only. */
9866
9867 void
9868 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
9869 {
9870 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
9871 rtx int_xmm, fp_xmm;
9872 rtx biases, exponents;
9873 rtx x;
9874
9875 int_xmm = gen_reg_rtx (V4SImode);
9876 if (TARGET_INTER_UNIT_MOVES)
9877 emit_insn (gen_movdi_to_sse (int_xmm, input));
9878 else if (TARGET_SSE_SPLIT_REGS)
9879 {
9880 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
9881 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
9882 }
9883 else
9884 {
9885 x = gen_reg_rtx (V2DImode);
9886 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
9887 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
9888 }
9889
9890 x = gen_rtx_CONST_VECTOR (V4SImode,
9891 gen_rtvec (4, GEN_INT (0x43300000UL),
9892 GEN_INT (0x45300000UL),
9893 const0_rtx, const0_rtx));
9894 exponents = validize_mem (force_const_mem (V4SImode, x));
9895
9896 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
9897 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
9898
9899 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
9900 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
9901 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
9902 (0x1.0p84 + double(fp_value_hi_xmm)).
9903 Note these exponents differ by 32. */
9904
9905 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
9906
9907 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
9908 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
9909 real_ldexp (&bias_lo_rvt, &dconst1, 52);
9910 real_ldexp (&bias_hi_rvt, &dconst1, 84);
9911 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
9912 x = const_double_from_real_value (bias_hi_rvt, DFmode);
9913 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
9914 biases = validize_mem (force_const_mem (V2DFmode, biases));
9915 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
9916
9917 /* Add the upper and lower DFmode values together. */
9918 if (TARGET_SSE3)
9919 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
9920 else
9921 {
9922 x = copy_to_mode_reg (V2DFmode, fp_xmm);
9923 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
9924 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
9925 }
9926
9927 ix86_expand_vector_extract (false, target, fp_xmm, 0);
9928 }
9929
9930 /* Convert an unsigned SImode value into a DFmode. Only currently used
9931 for SSE, but applicable anywhere. */
9932
9933 void
9934 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
9935 {
9936 REAL_VALUE_TYPE TWO31r;
9937 rtx x, fp;
9938
9939 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
9940 NULL, 1, OPTAB_DIRECT);
9941
9942 fp = gen_reg_rtx (DFmode);
9943 emit_insn (gen_floatsidf2 (fp, x));
9944
9945 real_ldexp (&TWO31r, &dconst1, 31);
9946 x = const_double_from_real_value (TWO31r, DFmode);
9947
9948 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
9949 if (x != target)
9950 emit_move_insn (target, x);
9951 }
9952
9953 /* Convert a signed DImode value into a DFmode. Only used for SSE in
9954 32-bit mode; otherwise we have a direct convert instruction. */
9955
9956 void
9957 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
9958 {
9959 REAL_VALUE_TYPE TWO32r;
9960 rtx fp_lo, fp_hi, x;
9961
9962 fp_lo = gen_reg_rtx (DFmode);
9963 fp_hi = gen_reg_rtx (DFmode);
9964
9965 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
9966
9967 real_ldexp (&TWO32r, &dconst1, 32);
9968 x = const_double_from_real_value (TWO32r, DFmode);
9969 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
9970
9971 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
9972
9973 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
9974 0, OPTAB_DIRECT);
9975 if (x != target)
9976 emit_move_insn (target, x);
9977 }
9978
9979 /* Convert an unsigned SImode value into a SFmode, using only SSE.
9980 For x86_32, -mfpmath=sse, !optimize_size only. */
9981 void
9982 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
9983 {
9984 REAL_VALUE_TYPE ONE16r;
9985 rtx fp_hi, fp_lo, int_hi, int_lo, x;
9986
9987 real_ldexp (&ONE16r, &dconst1, 16);
9988 x = const_double_from_real_value (ONE16r, SFmode);
9989 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
9990 NULL, 0, OPTAB_DIRECT);
9991 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
9992 NULL, 0, OPTAB_DIRECT);
9993 fp_hi = gen_reg_rtx (SFmode);
9994 fp_lo = gen_reg_rtx (SFmode);
9995 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
9996 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
9997 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
9998 0, OPTAB_DIRECT);
9999 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10000 0, OPTAB_DIRECT);
10001 if (!rtx_equal_p (target, fp_hi))
10002 emit_move_insn (target, fp_hi);
10003 }
10004
10005 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10006 then replicate the value for all elements of the vector
10007 register. */
10008
10009 rtx
10010 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10011 {
10012 rtvec v;
10013 switch (mode)
10014 {
10015 case SFmode:
10016 if (vect)
10017 v = gen_rtvec (4, value, value, value, value);
10018 else
10019 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10020 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10021 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10022
10023 case DFmode:
10024 if (vect)
10025 v = gen_rtvec (2, value, value);
10026 else
10027 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10028 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10029
10030 default:
10031 gcc_unreachable ();
10032 }
10033 }
10034
10035 /* A subroutine of ix86_expand_fp_absneg_operator and copysign expanders.
10036 Create a mask for the sign bit in MODE for an SSE register. If VECT is
10037 true, then replicate the mask for all elements of the vector register.
10038 If INVERT is true, then create a mask excluding the sign bit. */
10039
10040 rtx
10041 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10042 {
10043 enum machine_mode vec_mode;
10044 HOST_WIDE_INT hi, lo;
10045 int shift = 63;
10046 rtx v;
10047 rtx mask;
10048
10049 /* Find the sign bit, sign extended to 2*HWI. */
10050 if (mode == SFmode)
10051 lo = 0x80000000, hi = lo < 0;
10052 else if (HOST_BITS_PER_WIDE_INT >= 64)
10053 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10054 else
10055 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10056
10057 if (invert)
10058 lo = ~lo, hi = ~hi;
10059
10060 /* Force this value into the low part of a fp vector constant. */
10061 mask = immed_double_const (lo, hi, mode == SFmode ? SImode : DImode);
10062 mask = gen_lowpart (mode, mask);
10063
10064 v = ix86_build_const_vector (mode, vect, mask);
10065 vec_mode = (mode == SFmode) ? V4SFmode : V2DFmode;
10066 return force_reg (vec_mode, v);
10067 }
10068
10069 /* Generate code for floating point ABS or NEG. */
10070
10071 void
10072 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10073 rtx operands[])
10074 {
10075 rtx mask, set, use, clob, dst, src;
10076 bool matching_memory;
10077 bool use_sse = false;
10078 bool vector_mode = VECTOR_MODE_P (mode);
10079 enum machine_mode elt_mode = mode;
10080
10081 if (vector_mode)
10082 {
10083 elt_mode = GET_MODE_INNER (mode);
10084 use_sse = true;
10085 }
10086 else if (TARGET_SSE_MATH)
10087 use_sse = SSE_FLOAT_MODE_P (mode);
10088
10089 /* NEG and ABS performed with SSE use bitwise mask operations.
10090 Create the appropriate mask now. */
10091 if (use_sse)
10092 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10093 else
10094 mask = NULL_RTX;
10095
10096 dst = operands[0];
10097 src = operands[1];
10098
10099 /* If the destination is memory, and we don't have matching source
10100 operands or we're using the x87, do things in registers. */
10101 matching_memory = false;
10102 if (MEM_P (dst))
10103 {
10104 if (use_sse && rtx_equal_p (dst, src))
10105 matching_memory = true;
10106 else
10107 dst = gen_reg_rtx (mode);
10108 }
10109 if (MEM_P (src) && !matching_memory)
10110 src = force_reg (mode, src);
10111
10112 if (vector_mode)
10113 {
10114 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10115 set = gen_rtx_SET (VOIDmode, dst, set);
10116 emit_insn (set);
10117 }
10118 else
10119 {
10120 set = gen_rtx_fmt_e (code, mode, src);
10121 set = gen_rtx_SET (VOIDmode, dst, set);
10122 if (mask)
10123 {
10124 use = gen_rtx_USE (VOIDmode, mask);
10125 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10126 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10127 gen_rtvec (3, set, use, clob)));
10128 }
10129 else
10130 emit_insn (set);
10131 }
10132
10133 if (dst != operands[0])
10134 emit_move_insn (operands[0], dst);
10135 }
10136
10137 /* Expand a copysign operation. Special case operand 0 being a constant. */
10138
10139 void
10140 ix86_expand_copysign (rtx operands[])
10141 {
10142 enum machine_mode mode, vmode;
10143 rtx dest, op0, op1, mask, nmask;
10144
10145 dest = operands[0];
10146 op0 = operands[1];
10147 op1 = operands[2];
10148
10149 mode = GET_MODE (dest);
10150 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10151
10152 if (GET_CODE (op0) == CONST_DOUBLE)
10153 {
10154 rtvec v;
10155
10156 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10157 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10158
10159 if (op0 == CONST0_RTX (mode))
10160 op0 = CONST0_RTX (vmode);
10161 else
10162 {
10163 if (mode == SFmode)
10164 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10165 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10166 else
10167 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10168 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10169 }
10170
10171 mask = ix86_build_signbit_mask (mode, 0, 0);
10172
10173 if (mode == SFmode)
10174 emit_insn (gen_copysignsf3_const (dest, op0, op1, mask));
10175 else
10176 emit_insn (gen_copysigndf3_const (dest, op0, op1, mask));
10177 }
10178 else
10179 {
10180 nmask = ix86_build_signbit_mask (mode, 0, 1);
10181 mask = ix86_build_signbit_mask (mode, 0, 0);
10182
10183 if (mode == SFmode)
10184 emit_insn (gen_copysignsf3_var (dest, NULL, op0, op1, nmask, mask));
10185 else
10186 emit_insn (gen_copysigndf3_var (dest, NULL, op0, op1, nmask, mask));
10187 }
10188 }
10189
10190 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10191 be a constant, and so has already been expanded into a vector constant. */
10192
10193 void
10194 ix86_split_copysign_const (rtx operands[])
10195 {
10196 enum machine_mode mode, vmode;
10197 rtx dest, op0, op1, mask, x;
10198
10199 dest = operands[0];
10200 op0 = operands[1];
10201 op1 = operands[2];
10202 mask = operands[3];
10203
10204 mode = GET_MODE (dest);
10205 vmode = GET_MODE (mask);
10206
10207 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10208 x = gen_rtx_AND (vmode, dest, mask);
10209 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10210
10211 if (op0 != CONST0_RTX (vmode))
10212 {
10213 x = gen_rtx_IOR (vmode, dest, op0);
10214 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10215 }
10216 }
10217
10218 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10219 so we have to do two masks. */
10220
10221 void
10222 ix86_split_copysign_var (rtx operands[])
10223 {
10224 enum machine_mode mode, vmode;
10225 rtx dest, scratch, op0, op1, mask, nmask, x;
10226
10227 dest = operands[0];
10228 scratch = operands[1];
10229 op0 = operands[2];
10230 op1 = operands[3];
10231 nmask = operands[4];
10232 mask = operands[5];
10233
10234 mode = GET_MODE (dest);
10235 vmode = GET_MODE (mask);
10236
10237 if (rtx_equal_p (op0, op1))
10238 {
10239 /* Shouldn't happen often (it's useless, obviously), but when it does
10240 we'd generate incorrect code if we continue below. */
10241 emit_move_insn (dest, op0);
10242 return;
10243 }
10244
10245 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10246 {
10247 gcc_assert (REGNO (op1) == REGNO (scratch));
10248
10249 x = gen_rtx_AND (vmode, scratch, mask);
10250 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10251
10252 dest = mask;
10253 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10254 x = gen_rtx_NOT (vmode, dest);
10255 x = gen_rtx_AND (vmode, x, op0);
10256 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10257 }
10258 else
10259 {
10260 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10261 {
10262 x = gen_rtx_AND (vmode, scratch, mask);
10263 }
10264 else /* alternative 2,4 */
10265 {
10266 gcc_assert (REGNO (mask) == REGNO (scratch));
10267 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10268 x = gen_rtx_AND (vmode, scratch, op1);
10269 }
10270 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10271
10272 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10273 {
10274 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10275 x = gen_rtx_AND (vmode, dest, nmask);
10276 }
10277 else /* alternative 3,4 */
10278 {
10279 gcc_assert (REGNO (nmask) == REGNO (dest));
10280 dest = nmask;
10281 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10282 x = gen_rtx_AND (vmode, dest, op0);
10283 }
10284 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10285 }
10286
10287 x = gen_rtx_IOR (vmode, dest, scratch);
10288 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10289 }
10290
10291 /* Return TRUE or FALSE depending on whether the first SET in INSN
10292 has source and destination with matching CC modes, and that the
10293 CC mode is at least as constrained as REQ_MODE. */
10294
10295 int
10296 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10297 {
10298 rtx set;
10299 enum machine_mode set_mode;
10300
10301 set = PATTERN (insn);
10302 if (GET_CODE (set) == PARALLEL)
10303 set = XVECEXP (set, 0, 0);
10304 gcc_assert (GET_CODE (set) == SET);
10305 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10306
10307 set_mode = GET_MODE (SET_DEST (set));
10308 switch (set_mode)
10309 {
10310 case CCNOmode:
10311 if (req_mode != CCNOmode
10312 && (req_mode != CCmode
10313 || XEXP (SET_SRC (set), 1) != const0_rtx))
10314 return 0;
10315 break;
10316 case CCmode:
10317 if (req_mode == CCGCmode)
10318 return 0;
10319 /* FALLTHRU */
10320 case CCGCmode:
10321 if (req_mode == CCGOCmode || req_mode == CCNOmode)
10322 return 0;
10323 /* FALLTHRU */
10324 case CCGOCmode:
10325 if (req_mode == CCZmode)
10326 return 0;
10327 /* FALLTHRU */
10328 case CCZmode:
10329 break;
10330
10331 default:
10332 gcc_unreachable ();
10333 }
10334
10335 return (GET_MODE (SET_SRC (set)) == set_mode);
10336 }
10337
10338 /* Generate insn patterns to do an integer compare of OPERANDS. */
10339
10340 static rtx
10341 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
10342 {
10343 enum machine_mode cmpmode;
10344 rtx tmp, flags;
10345
10346 cmpmode = SELECT_CC_MODE (code, op0, op1);
10347 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
10348
10349 /* This is very simple, but making the interface the same as in the
10350 FP case makes the rest of the code easier. */
10351 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
10352 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
10353
10354 /* Return the test that should be put into the flags user, i.e.
10355 the bcc, scc, or cmov instruction. */
10356 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
10357 }
10358
10359 /* Figure out whether to use ordered or unordered fp comparisons.
10360 Return the appropriate mode to use. */
10361
10362 enum machine_mode
10363 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
10364 {
10365 /* ??? In order to make all comparisons reversible, we do all comparisons
10366 non-trapping when compiling for IEEE. Once gcc is able to distinguish
10367 all forms trapping and nontrapping comparisons, we can make inequality
10368 comparisons trapping again, since it results in better code when using
10369 FCOM based compares. */
10370 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
10371 }
10372
10373 enum machine_mode
10374 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
10375 {
10376 if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10377 return ix86_fp_compare_mode (code);
10378 switch (code)
10379 {
10380 /* Only zero flag is needed. */
10381 case EQ: /* ZF=0 */
10382 case NE: /* ZF!=0 */
10383 return CCZmode;
10384 /* Codes needing carry flag. */
10385 case GEU: /* CF=0 */
10386 case GTU: /* CF=0 & ZF=0 */
10387 case LTU: /* CF=1 */
10388 case LEU: /* CF=1 | ZF=1 */
10389 return CCmode;
10390 /* Codes possibly doable only with sign flag when
10391 comparing against zero. */
10392 case GE: /* SF=OF or SF=0 */
10393 case LT: /* SF<>OF or SF=1 */
10394 if (op1 == const0_rtx)
10395 return CCGOCmode;
10396 else
10397 /* For other cases Carry flag is not required. */
10398 return CCGCmode;
10399 /* Codes doable only with sign flag when comparing
10400 against zero, but we miss jump instruction for it
10401 so we need to use relational tests against overflow
10402 that thus needs to be zero. */
10403 case GT: /* ZF=0 & SF=OF */
10404 case LE: /* ZF=1 | SF<>OF */
10405 if (op1 == const0_rtx)
10406 return CCNOmode;
10407 else
10408 return CCGCmode;
10409 /* strcmp pattern do (use flags) and combine may ask us for proper
10410 mode. */
10411 case USE:
10412 return CCmode;
10413 default:
10414 gcc_unreachable ();
10415 }
10416 }
10417
10418 /* Return the fixed registers used for condition codes. */
10419
10420 static bool
10421 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
10422 {
10423 *p1 = FLAGS_REG;
10424 *p2 = FPSR_REG;
10425 return true;
10426 }
10427
10428 /* If two condition code modes are compatible, return a condition code
10429 mode which is compatible with both. Otherwise, return
10430 VOIDmode. */
10431
10432 static enum machine_mode
10433 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
10434 {
10435 if (m1 == m2)
10436 return m1;
10437
10438 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
10439 return VOIDmode;
10440
10441 if ((m1 == CCGCmode && m2 == CCGOCmode)
10442 || (m1 == CCGOCmode && m2 == CCGCmode))
10443 return CCGCmode;
10444
10445 switch (m1)
10446 {
10447 default:
10448 gcc_unreachable ();
10449
10450 case CCmode:
10451 case CCGCmode:
10452 case CCGOCmode:
10453 case CCNOmode:
10454 case CCZmode:
10455 switch (m2)
10456 {
10457 default:
10458 return VOIDmode;
10459
10460 case CCmode:
10461 case CCGCmode:
10462 case CCGOCmode:
10463 case CCNOmode:
10464 case CCZmode:
10465 return CCmode;
10466 }
10467
10468 case CCFPmode:
10469 case CCFPUmode:
10470 /* These are only compatible with themselves, which we already
10471 checked above. */
10472 return VOIDmode;
10473 }
10474 }
10475
10476 /* Split comparison code CODE into comparisons we can do using branch
10477 instructions. BYPASS_CODE is comparison code for branch that will
10478 branch around FIRST_CODE and SECOND_CODE. If some of branches
10479 is not required, set value to UNKNOWN.
10480 We never require more than two branches. */
10481
10482 void
10483 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
10484 enum rtx_code *first_code,
10485 enum rtx_code *second_code)
10486 {
10487 *first_code = code;
10488 *bypass_code = UNKNOWN;
10489 *second_code = UNKNOWN;
10490
10491 /* The fcomi comparison sets flags as follows:
10492
10493 cmp ZF PF CF
10494 > 0 0 0
10495 < 0 0 1
10496 = 1 0 0
10497 un 1 1 1 */
10498
10499 switch (code)
10500 {
10501 case GT: /* GTU - CF=0 & ZF=0 */
10502 case GE: /* GEU - CF=0 */
10503 case ORDERED: /* PF=0 */
10504 case UNORDERED: /* PF=1 */
10505 case UNEQ: /* EQ - ZF=1 */
10506 case UNLT: /* LTU - CF=1 */
10507 case UNLE: /* LEU - CF=1 | ZF=1 */
10508 case LTGT: /* EQ - ZF=0 */
10509 break;
10510 case LT: /* LTU - CF=1 - fails on unordered */
10511 *first_code = UNLT;
10512 *bypass_code = UNORDERED;
10513 break;
10514 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
10515 *first_code = UNLE;
10516 *bypass_code = UNORDERED;
10517 break;
10518 case EQ: /* EQ - ZF=1 - fails on unordered */
10519 *first_code = UNEQ;
10520 *bypass_code = UNORDERED;
10521 break;
10522 case NE: /* NE - ZF=0 - fails on unordered */
10523 *first_code = LTGT;
10524 *second_code = UNORDERED;
10525 break;
10526 case UNGE: /* GEU - CF=0 - fails on unordered */
10527 *first_code = GE;
10528 *second_code = UNORDERED;
10529 break;
10530 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
10531 *first_code = GT;
10532 *second_code = UNORDERED;
10533 break;
10534 default:
10535 gcc_unreachable ();
10536 }
10537 if (!TARGET_IEEE_FP)
10538 {
10539 *second_code = UNKNOWN;
10540 *bypass_code = UNKNOWN;
10541 }
10542 }
10543
10544 /* Return cost of comparison done fcom + arithmetics operations on AX.
10545 All following functions do use number of instructions as a cost metrics.
10546 In future this should be tweaked to compute bytes for optimize_size and
10547 take into account performance of various instructions on various CPUs. */
10548 static int
10549 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
10550 {
10551 if (!TARGET_IEEE_FP)
10552 return 4;
10553 /* The cost of code output by ix86_expand_fp_compare. */
10554 switch (code)
10555 {
10556 case UNLE:
10557 case UNLT:
10558 case LTGT:
10559 case GT:
10560 case GE:
10561 case UNORDERED:
10562 case ORDERED:
10563 case UNEQ:
10564 return 4;
10565 break;
10566 case LT:
10567 case NE:
10568 case EQ:
10569 case UNGE:
10570 return 5;
10571 break;
10572 case LE:
10573 case UNGT:
10574 return 6;
10575 break;
10576 default:
10577 gcc_unreachable ();
10578 }
10579 }
10580
10581 /* Return cost of comparison done using fcomi operation.
10582 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10583 static int
10584 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
10585 {
10586 enum rtx_code bypass_code, first_code, second_code;
10587 /* Return arbitrarily high cost when instruction is not supported - this
10588 prevents gcc from using it. */
10589 if (!TARGET_CMOVE)
10590 return 1024;
10591 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10592 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
10593 }
10594
10595 /* Return cost of comparison done using sahf operation.
10596 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10597 static int
10598 ix86_fp_comparison_sahf_cost (enum rtx_code code)
10599 {
10600 enum rtx_code bypass_code, first_code, second_code;
10601 /* Return arbitrarily high cost when instruction is not preferred - this
10602 avoids gcc from using it. */
10603 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
10604 return 1024;
10605 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10606 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
10607 }
10608
10609 /* Compute cost of the comparison done using any method.
10610 See ix86_fp_comparison_arithmetics_cost for the metrics. */
10611 static int
10612 ix86_fp_comparison_cost (enum rtx_code code)
10613 {
10614 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
10615 int min;
10616
10617 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
10618 sahf_cost = ix86_fp_comparison_sahf_cost (code);
10619
10620 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
10621 if (min > sahf_cost)
10622 min = sahf_cost;
10623 if (min > fcomi_cost)
10624 min = fcomi_cost;
10625 return min;
10626 }
10627
10628 /* Return true if we should use an FCOMI instruction for this
10629 fp comparison. */
10630
10631 int
10632 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
10633 {
10634 enum rtx_code swapped_code = swap_condition (code);
10635
10636 return ((ix86_fp_comparison_cost (code)
10637 == ix86_fp_comparison_fcomi_cost (code))
10638 || (ix86_fp_comparison_cost (swapped_code)
10639 == ix86_fp_comparison_fcomi_cost (swapped_code)));
10640 }
10641
10642 /* Swap, force into registers, or otherwise massage the two operands
10643 to a fp comparison. The operands are updated in place; the new
10644 comparison code is returned. */
10645
10646 static enum rtx_code
10647 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
10648 {
10649 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
10650 rtx op0 = *pop0, op1 = *pop1;
10651 enum machine_mode op_mode = GET_MODE (op0);
10652 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
10653
10654 /* All of the unordered compare instructions only work on registers.
10655 The same is true of the fcomi compare instructions. The XFmode
10656 compare instructions require registers except when comparing
10657 against zero or when converting operand 1 from fixed point to
10658 floating point. */
10659
10660 if (!is_sse
10661 && (fpcmp_mode == CCFPUmode
10662 || (op_mode == XFmode
10663 && ! (standard_80387_constant_p (op0) == 1
10664 || standard_80387_constant_p (op1) == 1)
10665 && GET_CODE (op1) != FLOAT)
10666 || ix86_use_fcomi_compare (code)))
10667 {
10668 op0 = force_reg (op_mode, op0);
10669 op1 = force_reg (op_mode, op1);
10670 }
10671 else
10672 {
10673 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
10674 things around if they appear profitable, otherwise force op0
10675 into a register. */
10676
10677 if (standard_80387_constant_p (op0) == 0
10678 || (MEM_P (op0)
10679 && ! (standard_80387_constant_p (op1) == 0
10680 || MEM_P (op1))))
10681 {
10682 rtx tmp;
10683 tmp = op0, op0 = op1, op1 = tmp;
10684 code = swap_condition (code);
10685 }
10686
10687 if (!REG_P (op0))
10688 op0 = force_reg (op_mode, op0);
10689
10690 if (CONSTANT_P (op1))
10691 {
10692 int tmp = standard_80387_constant_p (op1);
10693 if (tmp == 0)
10694 op1 = validize_mem (force_const_mem (op_mode, op1));
10695 else if (tmp == 1)
10696 {
10697 if (TARGET_CMOVE)
10698 op1 = force_reg (op_mode, op1);
10699 }
10700 else
10701 op1 = force_reg (op_mode, op1);
10702 }
10703 }
10704
10705 /* Try to rearrange the comparison to make it cheaper. */
10706 if (ix86_fp_comparison_cost (code)
10707 > ix86_fp_comparison_cost (swap_condition (code))
10708 && (REG_P (op1) || !no_new_pseudos))
10709 {
10710 rtx tmp;
10711 tmp = op0, op0 = op1, op1 = tmp;
10712 code = swap_condition (code);
10713 if (!REG_P (op0))
10714 op0 = force_reg (op_mode, op0);
10715 }
10716
10717 *pop0 = op0;
10718 *pop1 = op1;
10719 return code;
10720 }
10721
10722 /* Convert comparison codes we use to represent FP comparison to integer
10723 code that will result in proper branch. Return UNKNOWN if no such code
10724 is available. */
10725
10726 enum rtx_code
10727 ix86_fp_compare_code_to_integer (enum rtx_code code)
10728 {
10729 switch (code)
10730 {
10731 case GT:
10732 return GTU;
10733 case GE:
10734 return GEU;
10735 case ORDERED:
10736 case UNORDERED:
10737 return code;
10738 break;
10739 case UNEQ:
10740 return EQ;
10741 break;
10742 case UNLT:
10743 return LTU;
10744 break;
10745 case UNLE:
10746 return LEU;
10747 break;
10748 case LTGT:
10749 return NE;
10750 break;
10751 default:
10752 return UNKNOWN;
10753 }
10754 }
10755
10756 /* Generate insn patterns to do a floating point compare of OPERANDS. */
10757
10758 static rtx
10759 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
10760 rtx *second_test, rtx *bypass_test)
10761 {
10762 enum machine_mode fpcmp_mode, intcmp_mode;
10763 rtx tmp, tmp2;
10764 int cost = ix86_fp_comparison_cost (code);
10765 enum rtx_code bypass_code, first_code, second_code;
10766
10767 fpcmp_mode = ix86_fp_compare_mode (code);
10768 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
10769
10770 if (second_test)
10771 *second_test = NULL_RTX;
10772 if (bypass_test)
10773 *bypass_test = NULL_RTX;
10774
10775 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10776
10777 /* Do fcomi/sahf based test when profitable. */
10778 if ((TARGET_CMOVE || TARGET_SAHF)
10779 && (bypass_code == UNKNOWN || bypass_test)
10780 && (second_code == UNKNOWN || second_test)
10781 && ix86_fp_comparison_arithmetics_cost (code) > cost)
10782 {
10783 if (TARGET_CMOVE)
10784 {
10785 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10786 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
10787 tmp);
10788 emit_insn (tmp);
10789 }
10790 else
10791 {
10792 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10793 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10794 if (!scratch)
10795 scratch = gen_reg_rtx (HImode);
10796 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10797 emit_insn (gen_x86_sahf_1 (scratch));
10798 }
10799
10800 /* The FP codes work out to act like unsigned. */
10801 intcmp_mode = fpcmp_mode;
10802 code = first_code;
10803 if (bypass_code != UNKNOWN)
10804 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
10805 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10806 const0_rtx);
10807 if (second_code != UNKNOWN)
10808 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
10809 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10810 const0_rtx);
10811 }
10812 else
10813 {
10814 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
10815 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
10816 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
10817 if (!scratch)
10818 scratch = gen_reg_rtx (HImode);
10819 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
10820
10821 /* In the unordered case, we have to check C2 for NaN's, which
10822 doesn't happen to work out to anything nice combination-wise.
10823 So do some bit twiddling on the value we've got in AH to come
10824 up with an appropriate set of condition codes. */
10825
10826 intcmp_mode = CCNOmode;
10827 switch (code)
10828 {
10829 case GT:
10830 case UNGT:
10831 if (code == GT || !TARGET_IEEE_FP)
10832 {
10833 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10834 code = EQ;
10835 }
10836 else
10837 {
10838 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10839 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10840 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
10841 intcmp_mode = CCmode;
10842 code = GEU;
10843 }
10844 break;
10845 case LT:
10846 case UNLT:
10847 if (code == LT && TARGET_IEEE_FP)
10848 {
10849 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10850 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
10851 intcmp_mode = CCmode;
10852 code = EQ;
10853 }
10854 else
10855 {
10856 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
10857 code = NE;
10858 }
10859 break;
10860 case GE:
10861 case UNGE:
10862 if (code == GE || !TARGET_IEEE_FP)
10863 {
10864 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
10865 code = EQ;
10866 }
10867 else
10868 {
10869 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10870 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10871 GEN_INT (0x01)));
10872 code = NE;
10873 }
10874 break;
10875 case LE:
10876 case UNLE:
10877 if (code == LE && TARGET_IEEE_FP)
10878 {
10879 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10880 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
10881 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10882 intcmp_mode = CCmode;
10883 code = LTU;
10884 }
10885 else
10886 {
10887 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
10888 code = NE;
10889 }
10890 break;
10891 case EQ:
10892 case UNEQ:
10893 if (code == EQ && TARGET_IEEE_FP)
10894 {
10895 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10896 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
10897 intcmp_mode = CCmode;
10898 code = EQ;
10899 }
10900 else
10901 {
10902 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10903 code = NE;
10904 break;
10905 }
10906 break;
10907 case NE:
10908 case LTGT:
10909 if (code == NE && TARGET_IEEE_FP)
10910 {
10911 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
10912 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
10913 GEN_INT (0x40)));
10914 code = NE;
10915 }
10916 else
10917 {
10918 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
10919 code = EQ;
10920 }
10921 break;
10922
10923 case UNORDERED:
10924 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10925 code = NE;
10926 break;
10927 case ORDERED:
10928 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
10929 code = EQ;
10930 break;
10931
10932 default:
10933 gcc_unreachable ();
10934 }
10935 }
10936
10937 /* Return the test that should be put into the flags user, i.e.
10938 the bcc, scc, or cmov instruction. */
10939 return gen_rtx_fmt_ee (code, VOIDmode,
10940 gen_rtx_REG (intcmp_mode, FLAGS_REG),
10941 const0_rtx);
10942 }
10943
10944 rtx
10945 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
10946 {
10947 rtx op0, op1, ret;
10948 op0 = ix86_compare_op0;
10949 op1 = ix86_compare_op1;
10950
10951 if (second_test)
10952 *second_test = NULL_RTX;
10953 if (bypass_test)
10954 *bypass_test = NULL_RTX;
10955
10956 if (ix86_compare_emitted)
10957 {
10958 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
10959 ix86_compare_emitted = NULL_RTX;
10960 }
10961 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
10962 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
10963 second_test, bypass_test);
10964 else
10965 ret = ix86_expand_int_compare (code, op0, op1);
10966
10967 return ret;
10968 }
10969
10970 /* Return true if the CODE will result in nontrivial jump sequence. */
10971 bool
10972 ix86_fp_jump_nontrivial_p (enum rtx_code code)
10973 {
10974 enum rtx_code bypass_code, first_code, second_code;
10975 if (!TARGET_CMOVE)
10976 return true;
10977 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
10978 return bypass_code != UNKNOWN || second_code != UNKNOWN;
10979 }
10980
10981 void
10982 ix86_expand_branch (enum rtx_code code, rtx label)
10983 {
10984 rtx tmp;
10985
10986 /* If we have emitted a compare insn, go straight to simple.
10987 ix86_expand_compare won't emit anything if ix86_compare_emitted
10988 is non NULL. */
10989 if (ix86_compare_emitted)
10990 goto simple;
10991
10992 switch (GET_MODE (ix86_compare_op0))
10993 {
10994 case QImode:
10995 case HImode:
10996 case SImode:
10997 simple:
10998 tmp = ix86_expand_compare (code, NULL, NULL);
10999 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11000 gen_rtx_LABEL_REF (VOIDmode, label),
11001 pc_rtx);
11002 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11003 return;
11004
11005 case SFmode:
11006 case DFmode:
11007 case XFmode:
11008 {
11009 rtvec vec;
11010 int use_fcomi;
11011 enum rtx_code bypass_code, first_code, second_code;
11012
11013 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11014 &ix86_compare_op1);
11015
11016 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11017
11018 /* Check whether we will use the natural sequence with one jump. If
11019 so, we can expand jump early. Otherwise delay expansion by
11020 creating compound insn to not confuse optimizers. */
11021 if (bypass_code == UNKNOWN && second_code == UNKNOWN
11022 && TARGET_CMOVE)
11023 {
11024 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11025 gen_rtx_LABEL_REF (VOIDmode, label),
11026 pc_rtx, NULL_RTX, NULL_RTX);
11027 }
11028 else
11029 {
11030 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11031 ix86_compare_op0, ix86_compare_op1);
11032 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11033 gen_rtx_LABEL_REF (VOIDmode, label),
11034 pc_rtx);
11035 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11036
11037 use_fcomi = ix86_use_fcomi_compare (code);
11038 vec = rtvec_alloc (3 + !use_fcomi);
11039 RTVEC_ELT (vec, 0) = tmp;
11040 RTVEC_ELT (vec, 1)
11041 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 18));
11042 RTVEC_ELT (vec, 2)
11043 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, 17));
11044 if (! use_fcomi)
11045 RTVEC_ELT (vec, 3)
11046 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11047
11048 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11049 }
11050 return;
11051 }
11052
11053 case DImode:
11054 if (TARGET_64BIT)
11055 goto simple;
11056 case TImode:
11057 /* Expand DImode branch into multiple compare+branch. */
11058 {
11059 rtx lo[2], hi[2], label2;
11060 enum rtx_code code1, code2, code3;
11061 enum machine_mode submode;
11062
11063 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11064 {
11065 tmp = ix86_compare_op0;
11066 ix86_compare_op0 = ix86_compare_op1;
11067 ix86_compare_op1 = tmp;
11068 code = swap_condition (code);
11069 }
11070 if (GET_MODE (ix86_compare_op0) == DImode)
11071 {
11072 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11073 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11074 submode = SImode;
11075 }
11076 else
11077 {
11078 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11079 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11080 submode = DImode;
11081 }
11082
11083 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11084 avoid two branches. This costs one extra insn, so disable when
11085 optimizing for size. */
11086
11087 if ((code == EQ || code == NE)
11088 && (!optimize_size
11089 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11090 {
11091 rtx xor0, xor1;
11092
11093 xor1 = hi[0];
11094 if (hi[1] != const0_rtx)
11095 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11096 NULL_RTX, 0, OPTAB_WIDEN);
11097
11098 xor0 = lo[0];
11099 if (lo[1] != const0_rtx)
11100 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11101 NULL_RTX, 0, OPTAB_WIDEN);
11102
11103 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11104 NULL_RTX, 0, OPTAB_WIDEN);
11105
11106 ix86_compare_op0 = tmp;
11107 ix86_compare_op1 = const0_rtx;
11108 ix86_expand_branch (code, label);
11109 return;
11110 }
11111
11112 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11113 op1 is a constant and the low word is zero, then we can just
11114 examine the high word. */
11115
11116 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11117 switch (code)
11118 {
11119 case LT: case LTU: case GE: case GEU:
11120 ix86_compare_op0 = hi[0];
11121 ix86_compare_op1 = hi[1];
11122 ix86_expand_branch (code, label);
11123 return;
11124 default:
11125 break;
11126 }
11127
11128 /* Otherwise, we need two or three jumps. */
11129
11130 label2 = gen_label_rtx ();
11131
11132 code1 = code;
11133 code2 = swap_condition (code);
11134 code3 = unsigned_condition (code);
11135
11136 switch (code)
11137 {
11138 case LT: case GT: case LTU: case GTU:
11139 break;
11140
11141 case LE: code1 = LT; code2 = GT; break;
11142 case GE: code1 = GT; code2 = LT; break;
11143 case LEU: code1 = LTU; code2 = GTU; break;
11144 case GEU: code1 = GTU; code2 = LTU; break;
11145
11146 case EQ: code1 = UNKNOWN; code2 = NE; break;
11147 case NE: code2 = UNKNOWN; break;
11148
11149 default:
11150 gcc_unreachable ();
11151 }
11152
11153 /*
11154 * a < b =>
11155 * if (hi(a) < hi(b)) goto true;
11156 * if (hi(a) > hi(b)) goto false;
11157 * if (lo(a) < lo(b)) goto true;
11158 * false:
11159 */
11160
11161 ix86_compare_op0 = hi[0];
11162 ix86_compare_op1 = hi[1];
11163
11164 if (code1 != UNKNOWN)
11165 ix86_expand_branch (code1, label);
11166 if (code2 != UNKNOWN)
11167 ix86_expand_branch (code2, label2);
11168
11169 ix86_compare_op0 = lo[0];
11170 ix86_compare_op1 = lo[1];
11171 ix86_expand_branch (code3, label);
11172
11173 if (code2 != UNKNOWN)
11174 emit_label (label2);
11175 return;
11176 }
11177
11178 default:
11179 gcc_unreachable ();
11180 }
11181 }
11182
11183 /* Split branch based on floating point condition. */
11184 void
11185 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11186 rtx target1, rtx target2, rtx tmp, rtx pushed)
11187 {
11188 rtx second, bypass;
11189 rtx label = NULL_RTX;
11190 rtx condition;
11191 int bypass_probability = -1, second_probability = -1, probability = -1;
11192 rtx i;
11193
11194 if (target2 != pc_rtx)
11195 {
11196 rtx tmp = target2;
11197 code = reverse_condition_maybe_unordered (code);
11198 target2 = target1;
11199 target1 = tmp;
11200 }
11201
11202 condition = ix86_expand_fp_compare (code, op1, op2,
11203 tmp, &second, &bypass);
11204
11205 /* Remove pushed operand from stack. */
11206 if (pushed)
11207 ix86_free_from_memory (GET_MODE (pushed));
11208
11209 if (split_branch_probability >= 0)
11210 {
11211 /* Distribute the probabilities across the jumps.
11212 Assume the BYPASS and SECOND to be always test
11213 for UNORDERED. */
11214 probability = split_branch_probability;
11215
11216 /* Value of 1 is low enough to make no need for probability
11217 to be updated. Later we may run some experiments and see
11218 if unordered values are more frequent in practice. */
11219 if (bypass)
11220 bypass_probability = 1;
11221 if (second)
11222 second_probability = 1;
11223 }
11224 if (bypass != NULL_RTX)
11225 {
11226 label = gen_label_rtx ();
11227 i = emit_jump_insn (gen_rtx_SET
11228 (VOIDmode, pc_rtx,
11229 gen_rtx_IF_THEN_ELSE (VOIDmode,
11230 bypass,
11231 gen_rtx_LABEL_REF (VOIDmode,
11232 label),
11233 pc_rtx)));
11234 if (bypass_probability >= 0)
11235 REG_NOTES (i)
11236 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11237 GEN_INT (bypass_probability),
11238 REG_NOTES (i));
11239 }
11240 i = emit_jump_insn (gen_rtx_SET
11241 (VOIDmode, pc_rtx,
11242 gen_rtx_IF_THEN_ELSE (VOIDmode,
11243 condition, target1, target2)));
11244 if (probability >= 0)
11245 REG_NOTES (i)
11246 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11247 GEN_INT (probability),
11248 REG_NOTES (i));
11249 if (second != NULL_RTX)
11250 {
11251 i = emit_jump_insn (gen_rtx_SET
11252 (VOIDmode, pc_rtx,
11253 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11254 target2)));
11255 if (second_probability >= 0)
11256 REG_NOTES (i)
11257 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11258 GEN_INT (second_probability),
11259 REG_NOTES (i));
11260 }
11261 if (label != NULL_RTX)
11262 emit_label (label);
11263 }
11264
11265 int
11266 ix86_expand_setcc (enum rtx_code code, rtx dest)
11267 {
11268 rtx ret, tmp, tmpreg, equiv;
11269 rtx second_test, bypass_test;
11270
11271 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11272 return 0; /* FAIL */
11273
11274 gcc_assert (GET_MODE (dest) == QImode);
11275
11276 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11277 PUT_MODE (ret, QImode);
11278
11279 tmp = dest;
11280 tmpreg = dest;
11281
11282 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11283 if (bypass_test || second_test)
11284 {
11285 rtx test = second_test;
11286 int bypass = 0;
11287 rtx tmp2 = gen_reg_rtx (QImode);
11288 if (bypass_test)
11289 {
11290 gcc_assert (!second_test);
11291 test = bypass_test;
11292 bypass = 1;
11293 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
11294 }
11295 PUT_MODE (test, QImode);
11296 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
11297
11298 if (bypass)
11299 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
11300 else
11301 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
11302 }
11303
11304 /* Attach a REG_EQUAL note describing the comparison result. */
11305 if (ix86_compare_op0 && ix86_compare_op1)
11306 {
11307 equiv = simplify_gen_relational (code, QImode,
11308 GET_MODE (ix86_compare_op0),
11309 ix86_compare_op0, ix86_compare_op1);
11310 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
11311 }
11312
11313 return 1; /* DONE */
11314 }
11315
11316 /* Expand comparison setting or clearing carry flag. Return true when
11317 successful and set pop for the operation. */
11318 static bool
11319 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
11320 {
11321 enum machine_mode mode =
11322 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
11323
11324 /* Do not handle DImode compares that go through special path. Also we can't
11325 deal with FP compares yet. This is possible to add. */
11326 if (mode == (TARGET_64BIT ? TImode : DImode))
11327 return false;
11328 if (FLOAT_MODE_P (mode))
11329 {
11330 rtx second_test = NULL, bypass_test = NULL;
11331 rtx compare_op, compare_seq;
11332
11333 /* Shortcut: following common codes never translate into carry flag compares. */
11334 if (code == EQ || code == NE || code == UNEQ || code == LTGT
11335 || code == ORDERED || code == UNORDERED)
11336 return false;
11337
11338 /* These comparisons require zero flag; swap operands so they won't. */
11339 if ((code == GT || code == UNLE || code == LE || code == UNGT)
11340 && !TARGET_IEEE_FP)
11341 {
11342 rtx tmp = op0;
11343 op0 = op1;
11344 op1 = tmp;
11345 code = swap_condition (code);
11346 }
11347
11348 /* Try to expand the comparison and verify that we end up with carry flag
11349 based comparison. This is fails to be true only when we decide to expand
11350 comparison using arithmetic that is not too common scenario. */
11351 start_sequence ();
11352 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11353 &second_test, &bypass_test);
11354 compare_seq = get_insns ();
11355 end_sequence ();
11356
11357 if (second_test || bypass_test)
11358 return false;
11359 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11360 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11361 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
11362 else
11363 code = GET_CODE (compare_op);
11364 if (code != LTU && code != GEU)
11365 return false;
11366 emit_insn (compare_seq);
11367 *pop = compare_op;
11368 return true;
11369 }
11370 if (!INTEGRAL_MODE_P (mode))
11371 return false;
11372 switch (code)
11373 {
11374 case LTU:
11375 case GEU:
11376 break;
11377
11378 /* Convert a==0 into (unsigned)a<1. */
11379 case EQ:
11380 case NE:
11381 if (op1 != const0_rtx)
11382 return false;
11383 op1 = const1_rtx;
11384 code = (code == EQ ? LTU : GEU);
11385 break;
11386
11387 /* Convert a>b into b<a or a>=b-1. */
11388 case GTU:
11389 case LEU:
11390 if (CONST_INT_P (op1))
11391 {
11392 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
11393 /* Bail out on overflow. We still can swap operands but that
11394 would force loading of the constant into register. */
11395 if (op1 == const0_rtx
11396 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
11397 return false;
11398 code = (code == GTU ? GEU : LTU);
11399 }
11400 else
11401 {
11402 rtx tmp = op1;
11403 op1 = op0;
11404 op0 = tmp;
11405 code = (code == GTU ? LTU : GEU);
11406 }
11407 break;
11408
11409 /* Convert a>=0 into (unsigned)a<0x80000000. */
11410 case LT:
11411 case GE:
11412 if (mode == DImode || op1 != const0_rtx)
11413 return false;
11414 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11415 code = (code == LT ? GEU : LTU);
11416 break;
11417 case LE:
11418 case GT:
11419 if (mode == DImode || op1 != constm1_rtx)
11420 return false;
11421 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
11422 code = (code == LE ? GEU : LTU);
11423 break;
11424
11425 default:
11426 return false;
11427 }
11428 /* Swapping operands may cause constant to appear as first operand. */
11429 if (!nonimmediate_operand (op0, VOIDmode))
11430 {
11431 if (no_new_pseudos)
11432 return false;
11433 op0 = force_reg (mode, op0);
11434 }
11435 ix86_compare_op0 = op0;
11436 ix86_compare_op1 = op1;
11437 *pop = ix86_expand_compare (code, NULL, NULL);
11438 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
11439 return true;
11440 }
11441
11442 int
11443 ix86_expand_int_movcc (rtx operands[])
11444 {
11445 enum rtx_code code = GET_CODE (operands[1]), compare_code;
11446 rtx compare_seq, compare_op;
11447 rtx second_test, bypass_test;
11448 enum machine_mode mode = GET_MODE (operands[0]);
11449 bool sign_bit_compare_p = false;;
11450
11451 start_sequence ();
11452 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
11453 compare_seq = get_insns ();
11454 end_sequence ();
11455
11456 compare_code = GET_CODE (compare_op);
11457
11458 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
11459 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
11460 sign_bit_compare_p = true;
11461
11462 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
11463 HImode insns, we'd be swallowed in word prefix ops. */
11464
11465 if ((mode != HImode || TARGET_FAST_PREFIX)
11466 && (mode != (TARGET_64BIT ? TImode : DImode))
11467 && CONST_INT_P (operands[2])
11468 && CONST_INT_P (operands[3]))
11469 {
11470 rtx out = operands[0];
11471 HOST_WIDE_INT ct = INTVAL (operands[2]);
11472 HOST_WIDE_INT cf = INTVAL (operands[3]);
11473 HOST_WIDE_INT diff;
11474
11475 diff = ct - cf;
11476 /* Sign bit compares are better done using shifts than we do by using
11477 sbb. */
11478 if (sign_bit_compare_p
11479 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
11480 ix86_compare_op1, &compare_op))
11481 {
11482 /* Detect overlap between destination and compare sources. */
11483 rtx tmp = out;
11484
11485 if (!sign_bit_compare_p)
11486 {
11487 bool fpcmp = false;
11488
11489 compare_code = GET_CODE (compare_op);
11490
11491 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
11492 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
11493 {
11494 fpcmp = true;
11495 compare_code = ix86_fp_compare_code_to_integer (compare_code);
11496 }
11497
11498 /* To simplify rest of code, restrict to the GEU case. */
11499 if (compare_code == LTU)
11500 {
11501 HOST_WIDE_INT tmp = ct;
11502 ct = cf;
11503 cf = tmp;
11504 compare_code = reverse_condition (compare_code);
11505 code = reverse_condition (code);
11506 }
11507 else
11508 {
11509 if (fpcmp)
11510 PUT_CODE (compare_op,
11511 reverse_condition_maybe_unordered
11512 (GET_CODE (compare_op)));
11513 else
11514 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
11515 }
11516 diff = ct - cf;
11517
11518 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
11519 || reg_overlap_mentioned_p (out, ix86_compare_op1))
11520 tmp = gen_reg_rtx (mode);
11521
11522 if (mode == DImode)
11523 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
11524 else
11525 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
11526 }
11527 else
11528 {
11529 if (code == GT || code == GE)
11530 code = reverse_condition (code);
11531 else
11532 {
11533 HOST_WIDE_INT tmp = ct;
11534 ct = cf;
11535 cf = tmp;
11536 diff = ct - cf;
11537 }
11538 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
11539 ix86_compare_op1, VOIDmode, 0, -1);
11540 }
11541
11542 if (diff == 1)
11543 {
11544 /*
11545 * cmpl op0,op1
11546 * sbbl dest,dest
11547 * [addl dest, ct]
11548 *
11549 * Size 5 - 8.
11550 */
11551 if (ct)
11552 tmp = expand_simple_binop (mode, PLUS,
11553 tmp, GEN_INT (ct),
11554 copy_rtx (tmp), 1, OPTAB_DIRECT);
11555 }
11556 else if (cf == -1)
11557 {
11558 /*
11559 * cmpl op0,op1
11560 * sbbl dest,dest
11561 * orl $ct, dest
11562 *
11563 * Size 8.
11564 */
11565 tmp = expand_simple_binop (mode, IOR,
11566 tmp, GEN_INT (ct),
11567 copy_rtx (tmp), 1, OPTAB_DIRECT);
11568 }
11569 else if (diff == -1 && ct)
11570 {
11571 /*
11572 * cmpl op0,op1
11573 * sbbl dest,dest
11574 * notl dest
11575 * [addl dest, cf]
11576 *
11577 * Size 8 - 11.
11578 */
11579 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11580 if (cf)
11581 tmp = expand_simple_binop (mode, PLUS,
11582 copy_rtx (tmp), GEN_INT (cf),
11583 copy_rtx (tmp), 1, OPTAB_DIRECT);
11584 }
11585 else
11586 {
11587 /*
11588 * cmpl op0,op1
11589 * sbbl dest,dest
11590 * [notl dest]
11591 * andl cf - ct, dest
11592 * [addl dest, ct]
11593 *
11594 * Size 8 - 11.
11595 */
11596
11597 if (cf == 0)
11598 {
11599 cf = ct;
11600 ct = 0;
11601 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
11602 }
11603
11604 tmp = expand_simple_binop (mode, AND,
11605 copy_rtx (tmp),
11606 gen_int_mode (cf - ct, mode),
11607 copy_rtx (tmp), 1, OPTAB_DIRECT);
11608 if (ct)
11609 tmp = expand_simple_binop (mode, PLUS,
11610 copy_rtx (tmp), GEN_INT (ct),
11611 copy_rtx (tmp), 1, OPTAB_DIRECT);
11612 }
11613
11614 if (!rtx_equal_p (tmp, out))
11615 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
11616
11617 return 1; /* DONE */
11618 }
11619
11620 if (diff < 0)
11621 {
11622 HOST_WIDE_INT tmp;
11623 tmp = ct, ct = cf, cf = tmp;
11624 diff = -diff;
11625 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11626 {
11627 /* We may be reversing unordered compare to normal compare, that
11628 is not valid in general (we may convert non-trapping condition
11629 to trapping one), however on i386 we currently emit all
11630 comparisons unordered. */
11631 compare_code = reverse_condition_maybe_unordered (compare_code);
11632 code = reverse_condition_maybe_unordered (code);
11633 }
11634 else
11635 {
11636 compare_code = reverse_condition (compare_code);
11637 code = reverse_condition (code);
11638 }
11639 }
11640
11641 compare_code = UNKNOWN;
11642 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
11643 && CONST_INT_P (ix86_compare_op1))
11644 {
11645 if (ix86_compare_op1 == const0_rtx
11646 && (code == LT || code == GE))
11647 compare_code = code;
11648 else if (ix86_compare_op1 == constm1_rtx)
11649 {
11650 if (code == LE)
11651 compare_code = LT;
11652 else if (code == GT)
11653 compare_code = GE;
11654 }
11655 }
11656
11657 /* Optimize dest = (op0 < 0) ? -1 : cf. */
11658 if (compare_code != UNKNOWN
11659 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
11660 && (cf == -1 || ct == -1))
11661 {
11662 /* If lea code below could be used, only optimize
11663 if it results in a 2 insn sequence. */
11664
11665 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
11666 || diff == 3 || diff == 5 || diff == 9)
11667 || (compare_code == LT && ct == -1)
11668 || (compare_code == GE && cf == -1))
11669 {
11670 /*
11671 * notl op1 (if necessary)
11672 * sarl $31, op1
11673 * orl cf, op1
11674 */
11675 if (ct != -1)
11676 {
11677 cf = ct;
11678 ct = -1;
11679 code = reverse_condition (code);
11680 }
11681
11682 out = emit_store_flag (out, code, ix86_compare_op0,
11683 ix86_compare_op1, VOIDmode, 0, -1);
11684
11685 out = expand_simple_binop (mode, IOR,
11686 out, GEN_INT (cf),
11687 out, 1, OPTAB_DIRECT);
11688 if (out != operands[0])
11689 emit_move_insn (operands[0], out);
11690
11691 return 1; /* DONE */
11692 }
11693 }
11694
11695
11696 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
11697 || diff == 3 || diff == 5 || diff == 9)
11698 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
11699 && (mode != DImode
11700 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
11701 {
11702 /*
11703 * xorl dest,dest
11704 * cmpl op1,op2
11705 * setcc dest
11706 * lea cf(dest*(ct-cf)),dest
11707 *
11708 * Size 14.
11709 *
11710 * This also catches the degenerate setcc-only case.
11711 */
11712
11713 rtx tmp;
11714 int nops;
11715
11716 out = emit_store_flag (out, code, ix86_compare_op0,
11717 ix86_compare_op1, VOIDmode, 0, 1);
11718
11719 nops = 0;
11720 /* On x86_64 the lea instruction operates on Pmode, so we need
11721 to get arithmetics done in proper mode to match. */
11722 if (diff == 1)
11723 tmp = copy_rtx (out);
11724 else
11725 {
11726 rtx out1;
11727 out1 = copy_rtx (out);
11728 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
11729 nops++;
11730 if (diff & 1)
11731 {
11732 tmp = gen_rtx_PLUS (mode, tmp, out1);
11733 nops++;
11734 }
11735 }
11736 if (cf != 0)
11737 {
11738 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
11739 nops++;
11740 }
11741 if (!rtx_equal_p (tmp, out))
11742 {
11743 if (nops == 1)
11744 out = force_operand (tmp, copy_rtx (out));
11745 else
11746 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
11747 }
11748 if (!rtx_equal_p (out, operands[0]))
11749 emit_move_insn (operands[0], copy_rtx (out));
11750
11751 return 1; /* DONE */
11752 }
11753
11754 /*
11755 * General case: Jumpful:
11756 * xorl dest,dest cmpl op1, op2
11757 * cmpl op1, op2 movl ct, dest
11758 * setcc dest jcc 1f
11759 * decl dest movl cf, dest
11760 * andl (cf-ct),dest 1:
11761 * addl ct,dest
11762 *
11763 * Size 20. Size 14.
11764 *
11765 * This is reasonably steep, but branch mispredict costs are
11766 * high on modern cpus, so consider failing only if optimizing
11767 * for space.
11768 */
11769
11770 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11771 && BRANCH_COST >= 2)
11772 {
11773 if (cf == 0)
11774 {
11775 cf = ct;
11776 ct = 0;
11777 if (FLOAT_MODE_P (GET_MODE (ix86_compare_op0)))
11778 /* We may be reversing unordered compare to normal compare,
11779 that is not valid in general (we may convert non-trapping
11780 condition to trapping one), however on i386 we currently
11781 emit all comparisons unordered. */
11782 code = reverse_condition_maybe_unordered (code);
11783 else
11784 {
11785 code = reverse_condition (code);
11786 if (compare_code != UNKNOWN)
11787 compare_code = reverse_condition (compare_code);
11788 }
11789 }
11790
11791 if (compare_code != UNKNOWN)
11792 {
11793 /* notl op1 (if needed)
11794 sarl $31, op1
11795 andl (cf-ct), op1
11796 addl ct, op1
11797
11798 For x < 0 (resp. x <= -1) there will be no notl,
11799 so if possible swap the constants to get rid of the
11800 complement.
11801 True/false will be -1/0 while code below (store flag
11802 followed by decrement) is 0/-1, so the constants need
11803 to be exchanged once more. */
11804
11805 if (compare_code == GE || !cf)
11806 {
11807 code = reverse_condition (code);
11808 compare_code = LT;
11809 }
11810 else
11811 {
11812 HOST_WIDE_INT tmp = cf;
11813 cf = ct;
11814 ct = tmp;
11815 }
11816
11817 out = emit_store_flag (out, code, ix86_compare_op0,
11818 ix86_compare_op1, VOIDmode, 0, -1);
11819 }
11820 else
11821 {
11822 out = emit_store_flag (out, code, ix86_compare_op0,
11823 ix86_compare_op1, VOIDmode, 0, 1);
11824
11825 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
11826 copy_rtx (out), 1, OPTAB_DIRECT);
11827 }
11828
11829 out = expand_simple_binop (mode, AND, copy_rtx (out),
11830 gen_int_mode (cf - ct, mode),
11831 copy_rtx (out), 1, OPTAB_DIRECT);
11832 if (ct)
11833 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
11834 copy_rtx (out), 1, OPTAB_DIRECT);
11835 if (!rtx_equal_p (out, operands[0]))
11836 emit_move_insn (operands[0], copy_rtx (out));
11837
11838 return 1; /* DONE */
11839 }
11840 }
11841
11842 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
11843 {
11844 /* Try a few things more with specific constants and a variable. */
11845
11846 optab op;
11847 rtx var, orig_out, out, tmp;
11848
11849 if (BRANCH_COST <= 2)
11850 return 0; /* FAIL */
11851
11852 /* If one of the two operands is an interesting constant, load a
11853 constant with the above and mask it in with a logical operation. */
11854
11855 if (CONST_INT_P (operands[2]))
11856 {
11857 var = operands[3];
11858 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
11859 operands[3] = constm1_rtx, op = and_optab;
11860 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
11861 operands[3] = const0_rtx, op = ior_optab;
11862 else
11863 return 0; /* FAIL */
11864 }
11865 else if (CONST_INT_P (operands[3]))
11866 {
11867 var = operands[2];
11868 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
11869 operands[2] = constm1_rtx, op = and_optab;
11870 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
11871 operands[2] = const0_rtx, op = ior_optab;
11872 else
11873 return 0; /* FAIL */
11874 }
11875 else
11876 return 0; /* FAIL */
11877
11878 orig_out = operands[0];
11879 tmp = gen_reg_rtx (mode);
11880 operands[0] = tmp;
11881
11882 /* Recurse to get the constant loaded. */
11883 if (ix86_expand_int_movcc (operands) == 0)
11884 return 0; /* FAIL */
11885
11886 /* Mask in the interesting variable. */
11887 out = expand_binop (mode, op, var, tmp, orig_out, 0,
11888 OPTAB_WIDEN);
11889 if (!rtx_equal_p (out, orig_out))
11890 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
11891
11892 return 1; /* DONE */
11893 }
11894
11895 /*
11896 * For comparison with above,
11897 *
11898 * movl cf,dest
11899 * movl ct,tmp
11900 * cmpl op1,op2
11901 * cmovcc tmp,dest
11902 *
11903 * Size 15.
11904 */
11905
11906 if (! nonimmediate_operand (operands[2], mode))
11907 operands[2] = force_reg (mode, operands[2]);
11908 if (! nonimmediate_operand (operands[3], mode))
11909 operands[3] = force_reg (mode, operands[3]);
11910
11911 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
11912 {
11913 rtx tmp = gen_reg_rtx (mode);
11914 emit_move_insn (tmp, operands[3]);
11915 operands[3] = tmp;
11916 }
11917 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
11918 {
11919 rtx tmp = gen_reg_rtx (mode);
11920 emit_move_insn (tmp, operands[2]);
11921 operands[2] = tmp;
11922 }
11923
11924 if (! register_operand (operands[2], VOIDmode)
11925 && (mode == QImode
11926 || ! register_operand (operands[3], VOIDmode)))
11927 operands[2] = force_reg (mode, operands[2]);
11928
11929 if (mode == QImode
11930 && ! register_operand (operands[3], VOIDmode))
11931 operands[3] = force_reg (mode, operands[3]);
11932
11933 emit_insn (compare_seq);
11934 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
11935 gen_rtx_IF_THEN_ELSE (mode,
11936 compare_op, operands[2],
11937 operands[3])));
11938 if (bypass_test)
11939 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11940 gen_rtx_IF_THEN_ELSE (mode,
11941 bypass_test,
11942 copy_rtx (operands[3]),
11943 copy_rtx (operands[0]))));
11944 if (second_test)
11945 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
11946 gen_rtx_IF_THEN_ELSE (mode,
11947 second_test,
11948 copy_rtx (operands[2]),
11949 copy_rtx (operands[0]))));
11950
11951 return 1; /* DONE */
11952 }
11953
11954 /* Swap, force into registers, or otherwise massage the two operands
11955 to an sse comparison with a mask result. Thus we differ a bit from
11956 ix86_prepare_fp_compare_args which expects to produce a flags result.
11957
11958 The DEST operand exists to help determine whether to commute commutative
11959 operators. The POP0/POP1 operands are updated in place. The new
11960 comparison code is returned, or UNKNOWN if not implementable. */
11961
11962 static enum rtx_code
11963 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
11964 rtx *pop0, rtx *pop1)
11965 {
11966 rtx tmp;
11967
11968 switch (code)
11969 {
11970 case LTGT:
11971 case UNEQ:
11972 /* We have no LTGT as an operator. We could implement it with
11973 NE & ORDERED, but this requires an extra temporary. It's
11974 not clear that it's worth it. */
11975 return UNKNOWN;
11976
11977 case LT:
11978 case LE:
11979 case UNGT:
11980 case UNGE:
11981 /* These are supported directly. */
11982 break;
11983
11984 case EQ:
11985 case NE:
11986 case UNORDERED:
11987 case ORDERED:
11988 /* For commutative operators, try to canonicalize the destination
11989 operand to be first in the comparison - this helps reload to
11990 avoid extra moves. */
11991 if (!dest || !rtx_equal_p (dest, *pop1))
11992 break;
11993 /* FALLTHRU */
11994
11995 case GE:
11996 case GT:
11997 case UNLE:
11998 case UNLT:
11999 /* These are not supported directly. Swap the comparison operands
12000 to transform into something that is supported. */
12001 tmp = *pop0;
12002 *pop0 = *pop1;
12003 *pop1 = tmp;
12004 code = swap_condition (code);
12005 break;
12006
12007 default:
12008 gcc_unreachable ();
12009 }
12010
12011 return code;
12012 }
12013
12014 /* Detect conditional moves that exactly match min/max operational
12015 semantics. Note that this is IEEE safe, as long as we don't
12016 interchange the operands.
12017
12018 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12019 and TRUE if the operation is successful and instructions are emitted. */
12020
12021 static bool
12022 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12023 rtx cmp_op1, rtx if_true, rtx if_false)
12024 {
12025 enum machine_mode mode;
12026 bool is_min;
12027 rtx tmp;
12028
12029 if (code == LT)
12030 ;
12031 else if (code == UNGE)
12032 {
12033 tmp = if_true;
12034 if_true = if_false;
12035 if_false = tmp;
12036 }
12037 else
12038 return false;
12039
12040 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12041 is_min = true;
12042 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12043 is_min = false;
12044 else
12045 return false;
12046
12047 mode = GET_MODE (dest);
12048
12049 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12050 but MODE may be a vector mode and thus not appropriate. */
12051 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12052 {
12053 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12054 rtvec v;
12055
12056 if_true = force_reg (mode, if_true);
12057 v = gen_rtvec (2, if_true, if_false);
12058 tmp = gen_rtx_UNSPEC (mode, v, u);
12059 }
12060 else
12061 {
12062 code = is_min ? SMIN : SMAX;
12063 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12064 }
12065
12066 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12067 return true;
12068 }
12069
12070 /* Expand an sse vector comparison. Return the register with the result. */
12071
12072 static rtx
12073 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12074 rtx op_true, rtx op_false)
12075 {
12076 enum machine_mode mode = GET_MODE (dest);
12077 rtx x;
12078
12079 cmp_op0 = force_reg (mode, cmp_op0);
12080 if (!nonimmediate_operand (cmp_op1, mode))
12081 cmp_op1 = force_reg (mode, cmp_op1);
12082
12083 if (optimize
12084 || reg_overlap_mentioned_p (dest, op_true)
12085 || reg_overlap_mentioned_p (dest, op_false))
12086 dest = gen_reg_rtx (mode);
12087
12088 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12089 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12090
12091 return dest;
12092 }
12093
12094 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12095 operations. This is used for both scalar and vector conditional moves. */
12096
12097 static void
12098 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12099 {
12100 enum machine_mode mode = GET_MODE (dest);
12101 rtx t2, t3, x;
12102
12103 if (op_false == CONST0_RTX (mode))
12104 {
12105 op_true = force_reg (mode, op_true);
12106 x = gen_rtx_AND (mode, cmp, op_true);
12107 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12108 }
12109 else if (op_true == CONST0_RTX (mode))
12110 {
12111 op_false = force_reg (mode, op_false);
12112 x = gen_rtx_NOT (mode, cmp);
12113 x = gen_rtx_AND (mode, x, op_false);
12114 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12115 }
12116 else
12117 {
12118 op_true = force_reg (mode, op_true);
12119 op_false = force_reg (mode, op_false);
12120
12121 t2 = gen_reg_rtx (mode);
12122 if (optimize)
12123 t3 = gen_reg_rtx (mode);
12124 else
12125 t3 = dest;
12126
12127 x = gen_rtx_AND (mode, op_true, cmp);
12128 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12129
12130 x = gen_rtx_NOT (mode, cmp);
12131 x = gen_rtx_AND (mode, x, op_false);
12132 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12133
12134 x = gen_rtx_IOR (mode, t3, t2);
12135 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12136 }
12137 }
12138
12139 /* Expand a floating-point conditional move. Return true if successful. */
12140
12141 int
12142 ix86_expand_fp_movcc (rtx operands[])
12143 {
12144 enum machine_mode mode = GET_MODE (operands[0]);
12145 enum rtx_code code = GET_CODE (operands[1]);
12146 rtx tmp, compare_op, second_test, bypass_test;
12147
12148 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12149 {
12150 enum machine_mode cmode;
12151
12152 /* Since we've no cmove for sse registers, don't force bad register
12153 allocation just to gain access to it. Deny movcc when the
12154 comparison mode doesn't match the move mode. */
12155 cmode = GET_MODE (ix86_compare_op0);
12156 if (cmode == VOIDmode)
12157 cmode = GET_MODE (ix86_compare_op1);
12158 if (cmode != mode)
12159 return 0;
12160
12161 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12162 &ix86_compare_op0,
12163 &ix86_compare_op1);
12164 if (code == UNKNOWN)
12165 return 0;
12166
12167 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12168 ix86_compare_op1, operands[2],
12169 operands[3]))
12170 return 1;
12171
12172 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12173 ix86_compare_op1, operands[2], operands[3]);
12174 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12175 return 1;
12176 }
12177
12178 /* The floating point conditional move instructions don't directly
12179 support conditions resulting from a signed integer comparison. */
12180
12181 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12182
12183 /* The floating point conditional move instructions don't directly
12184 support signed integer comparisons. */
12185
12186 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12187 {
12188 gcc_assert (!second_test && !bypass_test);
12189 tmp = gen_reg_rtx (QImode);
12190 ix86_expand_setcc (code, tmp);
12191 code = NE;
12192 ix86_compare_op0 = tmp;
12193 ix86_compare_op1 = const0_rtx;
12194 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12195 }
12196 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12197 {
12198 tmp = gen_reg_rtx (mode);
12199 emit_move_insn (tmp, operands[3]);
12200 operands[3] = tmp;
12201 }
12202 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12203 {
12204 tmp = gen_reg_rtx (mode);
12205 emit_move_insn (tmp, operands[2]);
12206 operands[2] = tmp;
12207 }
12208
12209 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12210 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12211 operands[2], operands[3])));
12212 if (bypass_test)
12213 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12214 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12215 operands[3], operands[0])));
12216 if (second_test)
12217 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12218 gen_rtx_IF_THEN_ELSE (mode, second_test,
12219 operands[2], operands[0])));
12220
12221 return 1;
12222 }
12223
12224 /* Expand a floating-point vector conditional move; a vcond operation
12225 rather than a movcc operation. */
12226
12227 bool
12228 ix86_expand_fp_vcond (rtx operands[])
12229 {
12230 enum rtx_code code = GET_CODE (operands[3]);
12231 rtx cmp;
12232
12233 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12234 &operands[4], &operands[5]);
12235 if (code == UNKNOWN)
12236 return false;
12237
12238 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12239 operands[5], operands[1], operands[2]))
12240 return true;
12241
12242 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12243 operands[1], operands[2]);
12244 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12245 return true;
12246 }
12247
12248 /* Expand a signed integral vector conditional move. */
12249
12250 bool
12251 ix86_expand_int_vcond (rtx operands[])
12252 {
12253 enum machine_mode mode = GET_MODE (operands[0]);
12254 enum rtx_code code = GET_CODE (operands[3]);
12255 bool negate = false;
12256 rtx x, cop0, cop1;
12257
12258 cop0 = operands[4];
12259 cop1 = operands[5];
12260
12261 /* Canonicalize the comparison to EQ, GT, GTU. */
12262 switch (code)
12263 {
12264 case EQ:
12265 case GT:
12266 case GTU:
12267 break;
12268
12269 case NE:
12270 case LE:
12271 case LEU:
12272 code = reverse_condition (code);
12273 negate = true;
12274 break;
12275
12276 case GE:
12277 case GEU:
12278 code = reverse_condition (code);
12279 negate = true;
12280 /* FALLTHRU */
12281
12282 case LT:
12283 case LTU:
12284 code = swap_condition (code);
12285 x = cop0, cop0 = cop1, cop1 = x;
12286 break;
12287
12288 default:
12289 gcc_unreachable ();
12290 }
12291
12292 /* Unsigned parallel compare is not supported by the hardware. Play some
12293 tricks to turn this into a signed comparison against 0. */
12294 if (code == GTU)
12295 {
12296 cop0 = force_reg (mode, cop0);
12297
12298 switch (mode)
12299 {
12300 case V4SImode:
12301 {
12302 rtx t1, t2, mask;
12303
12304 /* Perform a parallel modulo subtraction. */
12305 t1 = gen_reg_rtx (mode);
12306 emit_insn (gen_subv4si3 (t1, cop0, cop1));
12307
12308 /* Extract the original sign bit of op0. */
12309 mask = GEN_INT (-0x80000000);
12310 mask = gen_rtx_CONST_VECTOR (mode,
12311 gen_rtvec (4, mask, mask, mask, mask));
12312 mask = force_reg (mode, mask);
12313 t2 = gen_reg_rtx (mode);
12314 emit_insn (gen_andv4si3 (t2, cop0, mask));
12315
12316 /* XOR it back into the result of the subtraction. This results
12317 in the sign bit set iff we saw unsigned underflow. */
12318 x = gen_reg_rtx (mode);
12319 emit_insn (gen_xorv4si3 (x, t1, t2));
12320
12321 code = GT;
12322 }
12323 break;
12324
12325 case V16QImode:
12326 case V8HImode:
12327 /* Perform a parallel unsigned saturating subtraction. */
12328 x = gen_reg_rtx (mode);
12329 emit_insn (gen_rtx_SET (VOIDmode, x,
12330 gen_rtx_US_MINUS (mode, cop0, cop1)));
12331
12332 code = EQ;
12333 negate = !negate;
12334 break;
12335
12336 default:
12337 gcc_unreachable ();
12338 }
12339
12340 cop0 = x;
12341 cop1 = CONST0_RTX (mode);
12342 }
12343
12344 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
12345 operands[1+negate], operands[2-negate]);
12346
12347 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
12348 operands[2-negate]);
12349 return true;
12350 }
12351
12352 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
12353 true if we should do zero extension, else sign extension. HIGH_P is
12354 true if we want the N/2 high elements, else the low elements. */
12355
12356 void
12357 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
12358 {
12359 enum machine_mode imode = GET_MODE (operands[1]);
12360 rtx (*unpack)(rtx, rtx, rtx);
12361 rtx se, dest;
12362
12363 switch (imode)
12364 {
12365 case V16QImode:
12366 if (high_p)
12367 unpack = gen_vec_interleave_highv16qi;
12368 else
12369 unpack = gen_vec_interleave_lowv16qi;
12370 break;
12371 case V8HImode:
12372 if (high_p)
12373 unpack = gen_vec_interleave_highv8hi;
12374 else
12375 unpack = gen_vec_interleave_lowv8hi;
12376 break;
12377 case V4SImode:
12378 if (high_p)
12379 unpack = gen_vec_interleave_highv4si;
12380 else
12381 unpack = gen_vec_interleave_lowv4si;
12382 break;
12383 default:
12384 gcc_unreachable ();
12385 }
12386
12387 dest = gen_lowpart (imode, operands[0]);
12388
12389 if (unsigned_p)
12390 se = force_reg (imode, CONST0_RTX (imode));
12391 else
12392 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
12393 operands[1], pc_rtx, pc_rtx);
12394
12395 emit_insn (unpack (dest, operands[1], se));
12396 }
12397
12398 /* Expand conditional increment or decrement using adb/sbb instructions.
12399 The default case using setcc followed by the conditional move can be
12400 done by generic code. */
12401 int
12402 ix86_expand_int_addcc (rtx operands[])
12403 {
12404 enum rtx_code code = GET_CODE (operands[1]);
12405 rtx compare_op;
12406 rtx val = const0_rtx;
12407 bool fpcmp = false;
12408 enum machine_mode mode = GET_MODE (operands[0]);
12409
12410 if (operands[3] != const1_rtx
12411 && operands[3] != constm1_rtx)
12412 return 0;
12413 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12414 ix86_compare_op1, &compare_op))
12415 return 0;
12416 code = GET_CODE (compare_op);
12417
12418 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12419 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12420 {
12421 fpcmp = true;
12422 code = ix86_fp_compare_code_to_integer (code);
12423 }
12424
12425 if (code != LTU)
12426 {
12427 val = constm1_rtx;
12428 if (fpcmp)
12429 PUT_CODE (compare_op,
12430 reverse_condition_maybe_unordered
12431 (GET_CODE (compare_op)));
12432 else
12433 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12434 }
12435 PUT_MODE (compare_op, mode);
12436
12437 /* Construct either adc or sbb insn. */
12438 if ((code == LTU) == (operands[3] == constm1_rtx))
12439 {
12440 switch (GET_MODE (operands[0]))
12441 {
12442 case QImode:
12443 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
12444 break;
12445 case HImode:
12446 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
12447 break;
12448 case SImode:
12449 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
12450 break;
12451 case DImode:
12452 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12453 break;
12454 default:
12455 gcc_unreachable ();
12456 }
12457 }
12458 else
12459 {
12460 switch (GET_MODE (operands[0]))
12461 {
12462 case QImode:
12463 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
12464 break;
12465 case HImode:
12466 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
12467 break;
12468 case SImode:
12469 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
12470 break;
12471 case DImode:
12472 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
12473 break;
12474 default:
12475 gcc_unreachable ();
12476 }
12477 }
12478 return 1; /* DONE */
12479 }
12480
12481
12482 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
12483 works for floating pointer parameters and nonoffsetable memories.
12484 For pushes, it returns just stack offsets; the values will be saved
12485 in the right order. Maximally three parts are generated. */
12486
12487 static int
12488 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
12489 {
12490 int size;
12491
12492 if (!TARGET_64BIT)
12493 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
12494 else
12495 size = (GET_MODE_SIZE (mode) + 4) / 8;
12496
12497 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
12498 gcc_assert (size >= 2 && size <= 3);
12499
12500 /* Optimize constant pool reference to immediates. This is used by fp
12501 moves, that force all constants to memory to allow combining. */
12502 if (MEM_P (operand) && MEM_READONLY_P (operand))
12503 {
12504 rtx tmp = maybe_get_pool_constant (operand);
12505 if (tmp)
12506 operand = tmp;
12507 }
12508
12509 if (MEM_P (operand) && !offsettable_memref_p (operand))
12510 {
12511 /* The only non-offsetable memories we handle are pushes. */
12512 int ok = push_operand (operand, VOIDmode);
12513
12514 gcc_assert (ok);
12515
12516 operand = copy_rtx (operand);
12517 PUT_MODE (operand, Pmode);
12518 parts[0] = parts[1] = parts[2] = operand;
12519 return size;
12520 }
12521
12522 if (GET_CODE (operand) == CONST_VECTOR)
12523 {
12524 enum machine_mode imode = int_mode_for_mode (mode);
12525 /* Caution: if we looked through a constant pool memory above,
12526 the operand may actually have a different mode now. That's
12527 ok, since we want to pun this all the way back to an integer. */
12528 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
12529 gcc_assert (operand != NULL);
12530 mode = imode;
12531 }
12532
12533 if (!TARGET_64BIT)
12534 {
12535 if (mode == DImode)
12536 split_di (&operand, 1, &parts[0], &parts[1]);
12537 else
12538 {
12539 if (REG_P (operand))
12540 {
12541 gcc_assert (reload_completed);
12542 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
12543 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
12544 if (size == 3)
12545 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
12546 }
12547 else if (offsettable_memref_p (operand))
12548 {
12549 operand = adjust_address (operand, SImode, 0);
12550 parts[0] = operand;
12551 parts[1] = adjust_address (operand, SImode, 4);
12552 if (size == 3)
12553 parts[2] = adjust_address (operand, SImode, 8);
12554 }
12555 else if (GET_CODE (operand) == CONST_DOUBLE)
12556 {
12557 REAL_VALUE_TYPE r;
12558 long l[4];
12559
12560 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12561 switch (mode)
12562 {
12563 case XFmode:
12564 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
12565 parts[2] = gen_int_mode (l[2], SImode);
12566 break;
12567 case DFmode:
12568 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
12569 break;
12570 default:
12571 gcc_unreachable ();
12572 }
12573 parts[1] = gen_int_mode (l[1], SImode);
12574 parts[0] = gen_int_mode (l[0], SImode);
12575 }
12576 else
12577 gcc_unreachable ();
12578 }
12579 }
12580 else
12581 {
12582 if (mode == TImode)
12583 split_ti (&operand, 1, &parts[0], &parts[1]);
12584 if (mode == XFmode || mode == TFmode)
12585 {
12586 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
12587 if (REG_P (operand))
12588 {
12589 gcc_assert (reload_completed);
12590 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
12591 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
12592 }
12593 else if (offsettable_memref_p (operand))
12594 {
12595 operand = adjust_address (operand, DImode, 0);
12596 parts[0] = operand;
12597 parts[1] = adjust_address (operand, upper_mode, 8);
12598 }
12599 else if (GET_CODE (operand) == CONST_DOUBLE)
12600 {
12601 REAL_VALUE_TYPE r;
12602 long l[4];
12603
12604 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
12605 real_to_target (l, &r, mode);
12606
12607 /* Do not use shift by 32 to avoid warning on 32bit systems. */
12608 if (HOST_BITS_PER_WIDE_INT >= 64)
12609 parts[0]
12610 = gen_int_mode
12611 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
12612 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
12613 DImode);
12614 else
12615 parts[0] = immed_double_const (l[0], l[1], DImode);
12616
12617 if (upper_mode == SImode)
12618 parts[1] = gen_int_mode (l[2], SImode);
12619 else if (HOST_BITS_PER_WIDE_INT >= 64)
12620 parts[1]
12621 = gen_int_mode
12622 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
12623 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
12624 DImode);
12625 else
12626 parts[1] = immed_double_const (l[2], l[3], DImode);
12627 }
12628 else
12629 gcc_unreachable ();
12630 }
12631 }
12632
12633 return size;
12634 }
12635
12636 /* Emit insns to perform a move or push of DI, DF, and XF values.
12637 Return false when normal moves are needed; true when all required
12638 insns have been emitted. Operands 2-4 contain the input values
12639 int the correct order; operands 5-7 contain the output values. */
12640
12641 void
12642 ix86_split_long_move (rtx operands[])
12643 {
12644 rtx part[2][3];
12645 int nparts;
12646 int push = 0;
12647 int collisions = 0;
12648 enum machine_mode mode = GET_MODE (operands[0]);
12649
12650 /* The DFmode expanders may ask us to move double.
12651 For 64bit target this is single move. By hiding the fact
12652 here we simplify i386.md splitters. */
12653 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
12654 {
12655 /* Optimize constant pool reference to immediates. This is used by
12656 fp moves, that force all constants to memory to allow combining. */
12657
12658 if (MEM_P (operands[1])
12659 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
12660 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
12661 operands[1] = get_pool_constant (XEXP (operands[1], 0));
12662 if (push_operand (operands[0], VOIDmode))
12663 {
12664 operands[0] = copy_rtx (operands[0]);
12665 PUT_MODE (operands[0], Pmode);
12666 }
12667 else
12668 operands[0] = gen_lowpart (DImode, operands[0]);
12669 operands[1] = gen_lowpart (DImode, operands[1]);
12670 emit_move_insn (operands[0], operands[1]);
12671 return;
12672 }
12673
12674 /* The only non-offsettable memory we handle is push. */
12675 if (push_operand (operands[0], VOIDmode))
12676 push = 1;
12677 else
12678 gcc_assert (!MEM_P (operands[0])
12679 || offsettable_memref_p (operands[0]));
12680
12681 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
12682 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
12683
12684 /* When emitting push, take care for source operands on the stack. */
12685 if (push && MEM_P (operands[1])
12686 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
12687 {
12688 if (nparts == 3)
12689 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
12690 XEXP (part[1][2], 0));
12691 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
12692 XEXP (part[1][1], 0));
12693 }
12694
12695 /* We need to do copy in the right order in case an address register
12696 of the source overlaps the destination. */
12697 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
12698 {
12699 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
12700 collisions++;
12701 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12702 collisions++;
12703 if (nparts == 3
12704 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
12705 collisions++;
12706
12707 /* Collision in the middle part can be handled by reordering. */
12708 if (collisions == 1 && nparts == 3
12709 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
12710 {
12711 rtx tmp;
12712 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
12713 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
12714 }
12715
12716 /* If there are more collisions, we can't handle it by reordering.
12717 Do an lea to the last part and use only one colliding move. */
12718 else if (collisions > 1)
12719 {
12720 rtx base;
12721
12722 collisions = 1;
12723
12724 base = part[0][nparts - 1];
12725
12726 /* Handle the case when the last part isn't valid for lea.
12727 Happens in 64-bit mode storing the 12-byte XFmode. */
12728 if (GET_MODE (base) != Pmode)
12729 base = gen_rtx_REG (Pmode, REGNO (base));
12730
12731 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
12732 part[1][0] = replace_equiv_address (part[1][0], base);
12733 part[1][1] = replace_equiv_address (part[1][1],
12734 plus_constant (base, UNITS_PER_WORD));
12735 if (nparts == 3)
12736 part[1][2] = replace_equiv_address (part[1][2],
12737 plus_constant (base, 8));
12738 }
12739 }
12740
12741 if (push)
12742 {
12743 if (!TARGET_64BIT)
12744 {
12745 if (nparts == 3)
12746 {
12747 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
12748 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
12749 emit_move_insn (part[0][2], part[1][2]);
12750 }
12751 }
12752 else
12753 {
12754 /* In 64bit mode we don't have 32bit push available. In case this is
12755 register, it is OK - we will just use larger counterpart. We also
12756 retype memory - these comes from attempt to avoid REX prefix on
12757 moving of second half of TFmode value. */
12758 if (GET_MODE (part[1][1]) == SImode)
12759 {
12760 switch (GET_CODE (part[1][1]))
12761 {
12762 case MEM:
12763 part[1][1] = adjust_address (part[1][1], DImode, 0);
12764 break;
12765
12766 case REG:
12767 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
12768 break;
12769
12770 default:
12771 gcc_unreachable ();
12772 }
12773
12774 if (GET_MODE (part[1][0]) == SImode)
12775 part[1][0] = part[1][1];
12776 }
12777 }
12778 emit_move_insn (part[0][1], part[1][1]);
12779 emit_move_insn (part[0][0], part[1][0]);
12780 return;
12781 }
12782
12783 /* Choose correct order to not overwrite the source before it is copied. */
12784 if ((REG_P (part[0][0])
12785 && REG_P (part[1][1])
12786 && (REGNO (part[0][0]) == REGNO (part[1][1])
12787 || (nparts == 3
12788 && REGNO (part[0][0]) == REGNO (part[1][2]))))
12789 || (collisions > 0
12790 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
12791 {
12792 if (nparts == 3)
12793 {
12794 operands[2] = part[0][2];
12795 operands[3] = part[0][1];
12796 operands[4] = part[0][0];
12797 operands[5] = part[1][2];
12798 operands[6] = part[1][1];
12799 operands[7] = part[1][0];
12800 }
12801 else
12802 {
12803 operands[2] = part[0][1];
12804 operands[3] = part[0][0];
12805 operands[5] = part[1][1];
12806 operands[6] = part[1][0];
12807 }
12808 }
12809 else
12810 {
12811 if (nparts == 3)
12812 {
12813 operands[2] = part[0][0];
12814 operands[3] = part[0][1];
12815 operands[4] = part[0][2];
12816 operands[5] = part[1][0];
12817 operands[6] = part[1][1];
12818 operands[7] = part[1][2];
12819 }
12820 else
12821 {
12822 operands[2] = part[0][0];
12823 operands[3] = part[0][1];
12824 operands[5] = part[1][0];
12825 operands[6] = part[1][1];
12826 }
12827 }
12828
12829 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
12830 if (optimize_size)
12831 {
12832 if (CONST_INT_P (operands[5])
12833 && operands[5] != const0_rtx
12834 && REG_P (operands[2]))
12835 {
12836 if (CONST_INT_P (operands[6])
12837 && INTVAL (operands[6]) == INTVAL (operands[5]))
12838 operands[6] = operands[2];
12839
12840 if (nparts == 3
12841 && CONST_INT_P (operands[7])
12842 && INTVAL (operands[7]) == INTVAL (operands[5]))
12843 operands[7] = operands[2];
12844 }
12845
12846 if (nparts == 3
12847 && CONST_INT_P (operands[6])
12848 && operands[6] != const0_rtx
12849 && REG_P (operands[3])
12850 && CONST_INT_P (operands[7])
12851 && INTVAL (operands[7]) == INTVAL (operands[6]))
12852 operands[7] = operands[3];
12853 }
12854
12855 emit_move_insn (operands[2], operands[5]);
12856 emit_move_insn (operands[3], operands[6]);
12857 if (nparts == 3)
12858 emit_move_insn (operands[4], operands[7]);
12859
12860 return;
12861 }
12862
12863 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
12864 left shift by a constant, either using a single shift or
12865 a sequence of add instructions. */
12866
12867 static void
12868 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
12869 {
12870 if (count == 1)
12871 {
12872 emit_insn ((mode == DImode
12873 ? gen_addsi3
12874 : gen_adddi3) (operand, operand, operand));
12875 }
12876 else if (!optimize_size
12877 && count * ix86_cost->add <= ix86_cost->shift_const)
12878 {
12879 int i;
12880 for (i=0; i<count; i++)
12881 {
12882 emit_insn ((mode == DImode
12883 ? gen_addsi3
12884 : gen_adddi3) (operand, operand, operand));
12885 }
12886 }
12887 else
12888 emit_insn ((mode == DImode
12889 ? gen_ashlsi3
12890 : gen_ashldi3) (operand, operand, GEN_INT (count)));
12891 }
12892
12893 void
12894 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
12895 {
12896 rtx low[2], high[2];
12897 int count;
12898 const int single_width = mode == DImode ? 32 : 64;
12899
12900 if (CONST_INT_P (operands[2]))
12901 {
12902 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
12903 count = INTVAL (operands[2]) & (single_width * 2 - 1);
12904
12905 if (count >= single_width)
12906 {
12907 emit_move_insn (high[0], low[1]);
12908 emit_move_insn (low[0], const0_rtx);
12909
12910 if (count > single_width)
12911 ix86_expand_ashl_const (high[0], count - single_width, mode);
12912 }
12913 else
12914 {
12915 if (!rtx_equal_p (operands[0], operands[1]))
12916 emit_move_insn (operands[0], operands[1]);
12917 emit_insn ((mode == DImode
12918 ? gen_x86_shld_1
12919 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
12920 ix86_expand_ashl_const (low[0], count, mode);
12921 }
12922 return;
12923 }
12924
12925 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
12926
12927 if (operands[1] == const1_rtx)
12928 {
12929 /* Assuming we've chosen a QImode capable registers, then 1 << N
12930 can be done with two 32/64-bit shifts, no branches, no cmoves. */
12931 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
12932 {
12933 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
12934
12935 ix86_expand_clear (low[0]);
12936 ix86_expand_clear (high[0]);
12937 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
12938
12939 d = gen_lowpart (QImode, low[0]);
12940 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12941 s = gen_rtx_EQ (QImode, flags, const0_rtx);
12942 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12943
12944 d = gen_lowpart (QImode, high[0]);
12945 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
12946 s = gen_rtx_NE (QImode, flags, const0_rtx);
12947 emit_insn (gen_rtx_SET (VOIDmode, d, s));
12948 }
12949
12950 /* Otherwise, we can get the same results by manually performing
12951 a bit extract operation on bit 5/6, and then performing the two
12952 shifts. The two methods of getting 0/1 into low/high are exactly
12953 the same size. Avoiding the shift in the bit extract case helps
12954 pentium4 a bit; no one else seems to care much either way. */
12955 else
12956 {
12957 rtx x;
12958
12959 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
12960 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
12961 else
12962 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
12963 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
12964
12965 emit_insn ((mode == DImode
12966 ? gen_lshrsi3
12967 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
12968 emit_insn ((mode == DImode
12969 ? gen_andsi3
12970 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
12971 emit_move_insn (low[0], high[0]);
12972 emit_insn ((mode == DImode
12973 ? gen_xorsi3
12974 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
12975 }
12976
12977 emit_insn ((mode == DImode
12978 ? gen_ashlsi3
12979 : gen_ashldi3) (low[0], low[0], operands[2]));
12980 emit_insn ((mode == DImode
12981 ? gen_ashlsi3
12982 : gen_ashldi3) (high[0], high[0], operands[2]));
12983 return;
12984 }
12985
12986 if (operands[1] == constm1_rtx)
12987 {
12988 /* For -1 << N, we can avoid the shld instruction, because we
12989 know that we're shifting 0...31/63 ones into a -1. */
12990 emit_move_insn (low[0], constm1_rtx);
12991 if (optimize_size)
12992 emit_move_insn (high[0], low[0]);
12993 else
12994 emit_move_insn (high[0], constm1_rtx);
12995 }
12996 else
12997 {
12998 if (!rtx_equal_p (operands[0], operands[1]))
12999 emit_move_insn (operands[0], operands[1]);
13000
13001 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13002 emit_insn ((mode == DImode
13003 ? gen_x86_shld_1
13004 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13005 }
13006
13007 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13008
13009 if (TARGET_CMOVE && scratch)
13010 {
13011 ix86_expand_clear (scratch);
13012 emit_insn ((mode == DImode
13013 ? gen_x86_shift_adj_1
13014 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13015 }
13016 else
13017 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13018 }
13019
13020 void
13021 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13022 {
13023 rtx low[2], high[2];
13024 int count;
13025 const int single_width = mode == DImode ? 32 : 64;
13026
13027 if (CONST_INT_P (operands[2]))
13028 {
13029 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13030 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13031
13032 if (count == single_width * 2 - 1)
13033 {
13034 emit_move_insn (high[0], high[1]);
13035 emit_insn ((mode == DImode
13036 ? gen_ashrsi3
13037 : gen_ashrdi3) (high[0], high[0],
13038 GEN_INT (single_width - 1)));
13039 emit_move_insn (low[0], high[0]);
13040
13041 }
13042 else if (count >= single_width)
13043 {
13044 emit_move_insn (low[0], high[1]);
13045 emit_move_insn (high[0], low[0]);
13046 emit_insn ((mode == DImode
13047 ? gen_ashrsi3
13048 : gen_ashrdi3) (high[0], high[0],
13049 GEN_INT (single_width - 1)));
13050 if (count > single_width)
13051 emit_insn ((mode == DImode
13052 ? gen_ashrsi3
13053 : gen_ashrdi3) (low[0], low[0],
13054 GEN_INT (count - single_width)));
13055 }
13056 else
13057 {
13058 if (!rtx_equal_p (operands[0], operands[1]))
13059 emit_move_insn (operands[0], operands[1]);
13060 emit_insn ((mode == DImode
13061 ? gen_x86_shrd_1
13062 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13063 emit_insn ((mode == DImode
13064 ? gen_ashrsi3
13065 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13066 }
13067 }
13068 else
13069 {
13070 if (!rtx_equal_p (operands[0], operands[1]))
13071 emit_move_insn (operands[0], operands[1]);
13072
13073 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13074
13075 emit_insn ((mode == DImode
13076 ? gen_x86_shrd_1
13077 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13078 emit_insn ((mode == DImode
13079 ? gen_ashrsi3
13080 : gen_ashrdi3) (high[0], high[0], operands[2]));
13081
13082 if (TARGET_CMOVE && scratch)
13083 {
13084 emit_move_insn (scratch, high[0]);
13085 emit_insn ((mode == DImode
13086 ? gen_ashrsi3
13087 : gen_ashrdi3) (scratch, scratch,
13088 GEN_INT (single_width - 1)));
13089 emit_insn ((mode == DImode
13090 ? gen_x86_shift_adj_1
13091 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13092 scratch));
13093 }
13094 else
13095 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13096 }
13097 }
13098
13099 void
13100 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13101 {
13102 rtx low[2], high[2];
13103 int count;
13104 const int single_width = mode == DImode ? 32 : 64;
13105
13106 if (CONST_INT_P (operands[2]))
13107 {
13108 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13109 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13110
13111 if (count >= single_width)
13112 {
13113 emit_move_insn (low[0], high[1]);
13114 ix86_expand_clear (high[0]);
13115
13116 if (count > single_width)
13117 emit_insn ((mode == DImode
13118 ? gen_lshrsi3
13119 : gen_lshrdi3) (low[0], low[0],
13120 GEN_INT (count - single_width)));
13121 }
13122 else
13123 {
13124 if (!rtx_equal_p (operands[0], operands[1]))
13125 emit_move_insn (operands[0], operands[1]);
13126 emit_insn ((mode == DImode
13127 ? gen_x86_shrd_1
13128 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13129 emit_insn ((mode == DImode
13130 ? gen_lshrsi3
13131 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13132 }
13133 }
13134 else
13135 {
13136 if (!rtx_equal_p (operands[0], operands[1]))
13137 emit_move_insn (operands[0], operands[1]);
13138
13139 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13140
13141 emit_insn ((mode == DImode
13142 ? gen_x86_shrd_1
13143 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13144 emit_insn ((mode == DImode
13145 ? gen_lshrsi3
13146 : gen_lshrdi3) (high[0], high[0], operands[2]));
13147
13148 /* Heh. By reversing the arguments, we can reuse this pattern. */
13149 if (TARGET_CMOVE && scratch)
13150 {
13151 ix86_expand_clear (scratch);
13152 emit_insn ((mode == DImode
13153 ? gen_x86_shift_adj_1
13154 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13155 scratch));
13156 }
13157 else
13158 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13159 }
13160 }
13161
13162 /* Predict just emitted jump instruction to be taken with probability PROB. */
13163 static void
13164 predict_jump (int prob)
13165 {
13166 rtx insn = get_last_insn ();
13167 gcc_assert (JUMP_P (insn));
13168 REG_NOTES (insn)
13169 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13170 GEN_INT (prob),
13171 REG_NOTES (insn));
13172 }
13173
13174 /* Helper function for the string operations below. Dest VARIABLE whether
13175 it is aligned to VALUE bytes. If true, jump to the label. */
13176 static rtx
13177 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13178 {
13179 rtx label = gen_label_rtx ();
13180 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13181 if (GET_MODE (variable) == DImode)
13182 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13183 else
13184 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13185 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13186 1, label);
13187 if (epilogue)
13188 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13189 else
13190 predict_jump (REG_BR_PROB_BASE * 90 / 100);
13191 return label;
13192 }
13193
13194 /* Adjust COUNTER by the VALUE. */
13195 static void
13196 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
13197 {
13198 if (GET_MODE (countreg) == DImode)
13199 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
13200 else
13201 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
13202 }
13203
13204 /* Zero extend possibly SImode EXP to Pmode register. */
13205 rtx
13206 ix86_zero_extend_to_Pmode (rtx exp)
13207 {
13208 rtx r;
13209 if (GET_MODE (exp) == VOIDmode)
13210 return force_reg (Pmode, exp);
13211 if (GET_MODE (exp) == Pmode)
13212 return copy_to_mode_reg (Pmode, exp);
13213 r = gen_reg_rtx (Pmode);
13214 emit_insn (gen_zero_extendsidi2 (r, exp));
13215 return r;
13216 }
13217
13218 /* Divide COUNTREG by SCALE. */
13219 static rtx
13220 scale_counter (rtx countreg, int scale)
13221 {
13222 rtx sc;
13223 rtx piece_size_mask;
13224
13225 if (scale == 1)
13226 return countreg;
13227 if (CONST_INT_P (countreg))
13228 return GEN_INT (INTVAL (countreg) / scale);
13229 gcc_assert (REG_P (countreg));
13230
13231 piece_size_mask = GEN_INT (scale - 1);
13232 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
13233 GEN_INT (exact_log2 (scale)),
13234 NULL, 1, OPTAB_DIRECT);
13235 return sc;
13236 }
13237
13238 /* Return mode for the memcpy/memset loop counter. Preffer SImode over DImode
13239 for constant loop counts. */
13240
13241 static enum machine_mode
13242 counter_mode (rtx count_exp)
13243 {
13244 if (GET_MODE (count_exp) != VOIDmode)
13245 return GET_MODE (count_exp);
13246 if (GET_CODE (count_exp) != CONST_INT)
13247 return Pmode;
13248 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
13249 return DImode;
13250 return SImode;
13251 }
13252
13253 /* When SRCPTR is non-NULL, output simple loop to move memory
13254 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
13255 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
13256 equivalent loop to set memory by VALUE (supposed to be in MODE).
13257
13258 The size is rounded down to whole number of chunk size moved at once.
13259 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
13260
13261
13262 static void
13263 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
13264 rtx destptr, rtx srcptr, rtx value,
13265 rtx count, enum machine_mode mode, int unroll,
13266 int expected_size)
13267 {
13268 rtx out_label, top_label, iter, tmp;
13269 enum machine_mode iter_mode = counter_mode (count);
13270 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
13271 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
13272 rtx size;
13273 rtx x_addr;
13274 rtx y_addr;
13275 int i;
13276
13277 top_label = gen_label_rtx ();
13278 out_label = gen_label_rtx ();
13279 iter = gen_reg_rtx (iter_mode);
13280
13281 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
13282 NULL, 1, OPTAB_DIRECT);
13283 /* Those two should combine. */
13284 if (piece_size == const1_rtx)
13285 {
13286 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
13287 true, out_label);
13288 predict_jump (REG_BR_PROB_BASE * 10 / 100);
13289 }
13290 emit_move_insn (iter, const0_rtx);
13291
13292 emit_label (top_label);
13293
13294 tmp = convert_modes (Pmode, iter_mode, iter, true);
13295 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
13296 destmem = change_address (destmem, mode, x_addr);
13297
13298 if (srcmem)
13299 {
13300 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
13301 srcmem = change_address (srcmem, mode, y_addr);
13302
13303 /* When unrolling for chips that reorder memory reads and writes,
13304 we can save registers by using single temporary.
13305 Also using 4 temporaries is overkill in 32bit mode. */
13306 if (!TARGET_64BIT && 0)
13307 {
13308 for (i = 0; i < unroll; i++)
13309 {
13310 if (i)
13311 {
13312 destmem =
13313 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13314 srcmem =
13315 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13316 }
13317 emit_move_insn (destmem, srcmem);
13318 }
13319 }
13320 else
13321 {
13322 rtx tmpreg[4];
13323 gcc_assert (unroll <= 4);
13324 for (i = 0; i < unroll; i++)
13325 {
13326 tmpreg[i] = gen_reg_rtx (mode);
13327 if (i)
13328 {
13329 srcmem =
13330 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
13331 }
13332 emit_move_insn (tmpreg[i], srcmem);
13333 }
13334 for (i = 0; i < unroll; i++)
13335 {
13336 if (i)
13337 {
13338 destmem =
13339 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13340 }
13341 emit_move_insn (destmem, tmpreg[i]);
13342 }
13343 }
13344 }
13345 else
13346 for (i = 0; i < unroll; i++)
13347 {
13348 if (i)
13349 destmem =
13350 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
13351 emit_move_insn (destmem, value);
13352 }
13353
13354 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
13355 true, OPTAB_LIB_WIDEN);
13356 if (tmp != iter)
13357 emit_move_insn (iter, tmp);
13358
13359 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
13360 true, top_label);
13361 if (expected_size != -1)
13362 {
13363 expected_size /= GET_MODE_SIZE (mode) * unroll;
13364 if (expected_size == 0)
13365 predict_jump (0);
13366 else if (expected_size > REG_BR_PROB_BASE)
13367 predict_jump (REG_BR_PROB_BASE - 1);
13368 else
13369 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
13370 }
13371 else
13372 predict_jump (REG_BR_PROB_BASE * 80 / 100);
13373 iter = ix86_zero_extend_to_Pmode (iter);
13374 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
13375 true, OPTAB_LIB_WIDEN);
13376 if (tmp != destptr)
13377 emit_move_insn (destptr, tmp);
13378 if (srcptr)
13379 {
13380 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
13381 true, OPTAB_LIB_WIDEN);
13382 if (tmp != srcptr)
13383 emit_move_insn (srcptr, tmp);
13384 }
13385 emit_label (out_label);
13386 }
13387
13388 /* Output "rep; mov" instruction.
13389 Arguments have same meaning as for previous function */
13390 static void
13391 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
13392 rtx destptr, rtx srcptr,
13393 rtx count,
13394 enum machine_mode mode)
13395 {
13396 rtx destexp;
13397 rtx srcexp;
13398 rtx countreg;
13399
13400 /* If the size is known, it is shorter to use rep movs. */
13401 if (mode == QImode && CONST_INT_P (count)
13402 && !(INTVAL (count) & 3))
13403 mode = SImode;
13404
13405 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13406 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13407 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
13408 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
13409 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13410 if (mode != QImode)
13411 {
13412 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13413 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13414 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13415 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
13416 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13417 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
13418 }
13419 else
13420 {
13421 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13422 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
13423 }
13424 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
13425 destexp, srcexp));
13426 }
13427
13428 /* Output "rep; stos" instruction.
13429 Arguments have same meaning as for previous function */
13430 static void
13431 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
13432 rtx count,
13433 enum machine_mode mode)
13434 {
13435 rtx destexp;
13436 rtx countreg;
13437
13438 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
13439 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
13440 value = force_reg (mode, gen_lowpart (mode, value));
13441 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
13442 if (mode != QImode)
13443 {
13444 destexp = gen_rtx_ASHIFT (Pmode, countreg,
13445 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
13446 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
13447 }
13448 else
13449 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
13450 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
13451 }
13452
13453 static void
13454 emit_strmov (rtx destmem, rtx srcmem,
13455 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
13456 {
13457 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
13458 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
13459 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13460 }
13461
13462 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
13463 static void
13464 expand_movmem_epilogue (rtx destmem, rtx srcmem,
13465 rtx destptr, rtx srcptr, rtx count, int max_size)
13466 {
13467 rtx src, dest;
13468 if (CONST_INT_P (count))
13469 {
13470 HOST_WIDE_INT countval = INTVAL (count);
13471 int offset = 0;
13472
13473 if ((countval & 0x10) && max_size > 16)
13474 {
13475 if (TARGET_64BIT)
13476 {
13477 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13478 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
13479 }
13480 else
13481 gcc_unreachable ();
13482 offset += 16;
13483 }
13484 if ((countval & 0x08) && max_size > 8)
13485 {
13486 if (TARGET_64BIT)
13487 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
13488 else
13489 {
13490 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13491 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
13492 }
13493 offset += 8;
13494 }
13495 if ((countval & 0x04) && max_size > 4)
13496 {
13497 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
13498 offset += 4;
13499 }
13500 if ((countval & 0x02) && max_size > 2)
13501 {
13502 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
13503 offset += 2;
13504 }
13505 if ((countval & 0x01) && max_size > 1)
13506 {
13507 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
13508 offset += 1;
13509 }
13510 return;
13511 }
13512 if (max_size > 8)
13513 {
13514 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
13515 count, 1, OPTAB_DIRECT);
13516 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
13517 count, QImode, 1, 4);
13518 return;
13519 }
13520
13521 /* When there are stringops, we can cheaply increase dest and src pointers.
13522 Otherwise we save code size by maintaining offset (zero is readily
13523 available from preceding rep operation) and using x86 addressing modes.
13524 */
13525 if (TARGET_SINGLE_STRINGOP)
13526 {
13527 if (max_size > 4)
13528 {
13529 rtx label = ix86_expand_aligntest (count, 4, true);
13530 src = change_address (srcmem, SImode, srcptr);
13531 dest = change_address (destmem, SImode, destptr);
13532 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13533 emit_label (label);
13534 LABEL_NUSES (label) = 1;
13535 }
13536 if (max_size > 2)
13537 {
13538 rtx label = ix86_expand_aligntest (count, 2, true);
13539 src = change_address (srcmem, HImode, srcptr);
13540 dest = change_address (destmem, HImode, destptr);
13541 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13542 emit_label (label);
13543 LABEL_NUSES (label) = 1;
13544 }
13545 if (max_size > 1)
13546 {
13547 rtx label = ix86_expand_aligntest (count, 1, true);
13548 src = change_address (srcmem, QImode, srcptr);
13549 dest = change_address (destmem, QImode, destptr);
13550 emit_insn (gen_strmov (destptr, dest, srcptr, src));
13551 emit_label (label);
13552 LABEL_NUSES (label) = 1;
13553 }
13554 }
13555 else
13556 {
13557 rtx offset = force_reg (Pmode, const0_rtx);
13558 rtx tmp;
13559
13560 if (max_size > 4)
13561 {
13562 rtx label = ix86_expand_aligntest (count, 4, true);
13563 src = change_address (srcmem, SImode, srcptr);
13564 dest = change_address (destmem, SImode, destptr);
13565 emit_move_insn (dest, src);
13566 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
13567 true, OPTAB_LIB_WIDEN);
13568 if (tmp != offset)
13569 emit_move_insn (offset, tmp);
13570 emit_label (label);
13571 LABEL_NUSES (label) = 1;
13572 }
13573 if (max_size > 2)
13574 {
13575 rtx label = ix86_expand_aligntest (count, 2, true);
13576 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13577 src = change_address (srcmem, HImode, tmp);
13578 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13579 dest = change_address (destmem, HImode, tmp);
13580 emit_move_insn (dest, src);
13581 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
13582 true, OPTAB_LIB_WIDEN);
13583 if (tmp != offset)
13584 emit_move_insn (offset, tmp);
13585 emit_label (label);
13586 LABEL_NUSES (label) = 1;
13587 }
13588 if (max_size > 1)
13589 {
13590 rtx label = ix86_expand_aligntest (count, 1, true);
13591 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
13592 src = change_address (srcmem, QImode, tmp);
13593 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
13594 dest = change_address (destmem, QImode, tmp);
13595 emit_move_insn (dest, src);
13596 emit_label (label);
13597 LABEL_NUSES (label) = 1;
13598 }
13599 }
13600 }
13601
13602 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13603 static void
13604 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
13605 rtx count, int max_size)
13606 {
13607 count =
13608 expand_simple_binop (counter_mode (count), AND, count,
13609 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
13610 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
13611 gen_lowpart (QImode, value), count, QImode,
13612 1, max_size / 2);
13613 }
13614
13615 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
13616 static void
13617 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
13618 {
13619 rtx dest;
13620
13621 if (CONST_INT_P (count))
13622 {
13623 HOST_WIDE_INT countval = INTVAL (count);
13624 int offset = 0;
13625
13626 if ((countval & 0x10) && max_size > 16)
13627 {
13628 if (TARGET_64BIT)
13629 {
13630 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13631 emit_insn (gen_strset (destptr, dest, value));
13632 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
13633 emit_insn (gen_strset (destptr, dest, value));
13634 }
13635 else
13636 gcc_unreachable ();
13637 offset += 16;
13638 }
13639 if ((countval & 0x08) && max_size > 8)
13640 {
13641 if (TARGET_64BIT)
13642 {
13643 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
13644 emit_insn (gen_strset (destptr, dest, value));
13645 }
13646 else
13647 {
13648 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13649 emit_insn (gen_strset (destptr, dest, value));
13650 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
13651 emit_insn (gen_strset (destptr, dest, value));
13652 }
13653 offset += 8;
13654 }
13655 if ((countval & 0x04) && max_size > 4)
13656 {
13657 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
13658 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13659 offset += 4;
13660 }
13661 if ((countval & 0x02) && max_size > 2)
13662 {
13663 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
13664 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13665 offset += 2;
13666 }
13667 if ((countval & 0x01) && max_size > 1)
13668 {
13669 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
13670 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13671 offset += 1;
13672 }
13673 return;
13674 }
13675 if (max_size > 32)
13676 {
13677 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
13678 return;
13679 }
13680 if (max_size > 16)
13681 {
13682 rtx label = ix86_expand_aligntest (count, 16, true);
13683 if (TARGET_64BIT)
13684 {
13685 dest = change_address (destmem, DImode, destptr);
13686 emit_insn (gen_strset (destptr, dest, value));
13687 emit_insn (gen_strset (destptr, dest, value));
13688 }
13689 else
13690 {
13691 dest = change_address (destmem, SImode, destptr);
13692 emit_insn (gen_strset (destptr, dest, value));
13693 emit_insn (gen_strset (destptr, dest, value));
13694 emit_insn (gen_strset (destptr, dest, value));
13695 emit_insn (gen_strset (destptr, dest, value));
13696 }
13697 emit_label (label);
13698 LABEL_NUSES (label) = 1;
13699 }
13700 if (max_size > 8)
13701 {
13702 rtx label = ix86_expand_aligntest (count, 8, true);
13703 if (TARGET_64BIT)
13704 {
13705 dest = change_address (destmem, DImode, destptr);
13706 emit_insn (gen_strset (destptr, dest, value));
13707 }
13708 else
13709 {
13710 dest = change_address (destmem, SImode, destptr);
13711 emit_insn (gen_strset (destptr, dest, value));
13712 emit_insn (gen_strset (destptr, dest, value));
13713 }
13714 emit_label (label);
13715 LABEL_NUSES (label) = 1;
13716 }
13717 if (max_size > 4)
13718 {
13719 rtx label = ix86_expand_aligntest (count, 4, true);
13720 dest = change_address (destmem, SImode, destptr);
13721 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
13722 emit_label (label);
13723 LABEL_NUSES (label) = 1;
13724 }
13725 if (max_size > 2)
13726 {
13727 rtx label = ix86_expand_aligntest (count, 2, true);
13728 dest = change_address (destmem, HImode, destptr);
13729 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
13730 emit_label (label);
13731 LABEL_NUSES (label) = 1;
13732 }
13733 if (max_size > 1)
13734 {
13735 rtx label = ix86_expand_aligntest (count, 1, true);
13736 dest = change_address (destmem, QImode, destptr);
13737 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
13738 emit_label (label);
13739 LABEL_NUSES (label) = 1;
13740 }
13741 }
13742
13743 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
13744 DESIRED_ALIGNMENT. */
13745 static void
13746 expand_movmem_prologue (rtx destmem, rtx srcmem,
13747 rtx destptr, rtx srcptr, rtx count,
13748 int align, int desired_alignment)
13749 {
13750 if (align <= 1 && desired_alignment > 1)
13751 {
13752 rtx label = ix86_expand_aligntest (destptr, 1, false);
13753 srcmem = change_address (srcmem, QImode, srcptr);
13754 destmem = change_address (destmem, QImode, destptr);
13755 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13756 ix86_adjust_counter (count, 1);
13757 emit_label (label);
13758 LABEL_NUSES (label) = 1;
13759 }
13760 if (align <= 2 && desired_alignment > 2)
13761 {
13762 rtx label = ix86_expand_aligntest (destptr, 2, false);
13763 srcmem = change_address (srcmem, HImode, srcptr);
13764 destmem = change_address (destmem, HImode, destptr);
13765 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13766 ix86_adjust_counter (count, 2);
13767 emit_label (label);
13768 LABEL_NUSES (label) = 1;
13769 }
13770 if (align <= 4 && desired_alignment > 4)
13771 {
13772 rtx label = ix86_expand_aligntest (destptr, 4, false);
13773 srcmem = change_address (srcmem, SImode, srcptr);
13774 destmem = change_address (destmem, SImode, destptr);
13775 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
13776 ix86_adjust_counter (count, 4);
13777 emit_label (label);
13778 LABEL_NUSES (label) = 1;
13779 }
13780 gcc_assert (desired_alignment <= 8);
13781 }
13782
13783 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
13784 DESIRED_ALIGNMENT. */
13785 static void
13786 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
13787 int align, int desired_alignment)
13788 {
13789 if (align <= 1 && desired_alignment > 1)
13790 {
13791 rtx label = ix86_expand_aligntest (destptr, 1, false);
13792 destmem = change_address (destmem, QImode, destptr);
13793 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
13794 ix86_adjust_counter (count, 1);
13795 emit_label (label);
13796 LABEL_NUSES (label) = 1;
13797 }
13798 if (align <= 2 && desired_alignment > 2)
13799 {
13800 rtx label = ix86_expand_aligntest (destptr, 2, false);
13801 destmem = change_address (destmem, HImode, destptr);
13802 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
13803 ix86_adjust_counter (count, 2);
13804 emit_label (label);
13805 LABEL_NUSES (label) = 1;
13806 }
13807 if (align <= 4 && desired_alignment > 4)
13808 {
13809 rtx label = ix86_expand_aligntest (destptr, 4, false);
13810 destmem = change_address (destmem, SImode, destptr);
13811 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
13812 ix86_adjust_counter (count, 4);
13813 emit_label (label);
13814 LABEL_NUSES (label) = 1;
13815 }
13816 gcc_assert (desired_alignment <= 8);
13817 }
13818
13819 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
13820 static enum stringop_alg
13821 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
13822 int *dynamic_check)
13823 {
13824 const struct stringop_algs * algs;
13825
13826 *dynamic_check = -1;
13827 if (memset)
13828 algs = &ix86_cost->memset[TARGET_64BIT != 0];
13829 else
13830 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
13831 if (stringop_alg != no_stringop)
13832 return stringop_alg;
13833 /* rep; movq or rep; movl is the smallest variant. */
13834 else if (optimize_size)
13835 {
13836 if (!count || (count & 3))
13837 return rep_prefix_1_byte;
13838 else
13839 return rep_prefix_4_byte;
13840 }
13841 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
13842 */
13843 else if (expected_size != -1 && expected_size < 4)
13844 return loop_1_byte;
13845 else if (expected_size != -1)
13846 {
13847 unsigned int i;
13848 enum stringop_alg alg = libcall;
13849 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13850 {
13851 gcc_assert (algs->size[i].max);
13852 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
13853 {
13854 if (algs->size[i].alg != libcall)
13855 alg = algs->size[i].alg;
13856 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
13857 last non-libcall inline algorithm. */
13858 if (TARGET_INLINE_ALL_STRINGOPS)
13859 {
13860 /* When the current size is best to be copied by a libcall,
13861 but we are still forced to inline, run the heuristic bellow
13862 that will pick code for medium sized blocks. */
13863 if (alg != libcall)
13864 return alg;
13865 break;
13866 }
13867 else
13868 return algs->size[i].alg;
13869 }
13870 }
13871 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
13872 }
13873 /* When asked to inline the call anyway, try to pick meaningful choice.
13874 We look for maximal size of block that is faster to copy by hand and
13875 take blocks of at most of that size guessing that average size will
13876 be roughly half of the block.
13877
13878 If this turns out to be bad, we might simply specify the preferred
13879 choice in ix86_costs. */
13880 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13881 && algs->unknown_size == libcall)
13882 {
13883 int max = -1;
13884 enum stringop_alg alg;
13885 int i;
13886
13887 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
13888 if (algs->size[i].alg != libcall && algs->size[i].alg)
13889 max = algs->size[i].max;
13890 if (max == -1)
13891 max = 4096;
13892 alg = decide_alg (count, max / 2, memset, dynamic_check);
13893 gcc_assert (*dynamic_check == -1);
13894 gcc_assert (alg != libcall);
13895 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
13896 *dynamic_check = max;
13897 return alg;
13898 }
13899 return algs->unknown_size;
13900 }
13901
13902 /* Decide on alignment. We know that the operand is already aligned to ALIGN
13903 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
13904 static int
13905 decide_alignment (int align,
13906 enum stringop_alg alg,
13907 int expected_size)
13908 {
13909 int desired_align = 0;
13910 switch (alg)
13911 {
13912 case no_stringop:
13913 gcc_unreachable ();
13914 case loop:
13915 case unrolled_loop:
13916 desired_align = GET_MODE_SIZE (Pmode);
13917 break;
13918 case rep_prefix_8_byte:
13919 desired_align = 8;
13920 break;
13921 case rep_prefix_4_byte:
13922 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13923 copying whole cacheline at once. */
13924 if (TARGET_PENTIUMPRO)
13925 desired_align = 8;
13926 else
13927 desired_align = 4;
13928 break;
13929 case rep_prefix_1_byte:
13930 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
13931 copying whole cacheline at once. */
13932 if (TARGET_PENTIUMPRO)
13933 desired_align = 8;
13934 else
13935 desired_align = 1;
13936 break;
13937 case loop_1_byte:
13938 desired_align = 1;
13939 break;
13940 case libcall:
13941 return 0;
13942 }
13943
13944 if (optimize_size)
13945 desired_align = 1;
13946 if (desired_align < align)
13947 desired_align = align;
13948 if (expected_size != -1 && expected_size < 4)
13949 desired_align = align;
13950 return desired_align;
13951 }
13952
13953 /* Return the smallest power of 2 greater than VAL. */
13954 static int
13955 smallest_pow2_greater_than (int val)
13956 {
13957 int ret = 1;
13958 while (ret <= val)
13959 ret <<= 1;
13960 return ret;
13961 }
13962
13963 /* Expand string move (memcpy) operation. Use i386 string operations when
13964 profitable. expand_clrmem contains similar code. The code depends upon
13965 architecture, block size and alignment, but always has the same
13966 overall structure:
13967
13968 1) Prologue guard: Conditional that jumps up to epilogues for small
13969 blocks that can be handled by epilogue alone. This is faster but
13970 also needed for correctness, since prologue assume the block is larger
13971 than the desired alignment.
13972
13973 Optional dynamic check for size and libcall for large
13974 blocks is emitted here too, with -minline-stringops-dynamically.
13975
13976 2) Prologue: copy first few bytes in order to get destination aligned
13977 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
13978 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
13979 We emit either a jump tree on power of two sized blocks, or a byte loop.
13980
13981 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
13982 with specified algorithm.
13983
13984 4) Epilogue: code copying tail of the block that is too small to be
13985 handled by main body (or up to size guarded by prologue guard). */
13986
13987 int
13988 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
13989 rtx expected_align_exp, rtx expected_size_exp)
13990 {
13991 rtx destreg;
13992 rtx srcreg;
13993 rtx label = NULL;
13994 rtx tmp;
13995 rtx jump_around_label = NULL;
13996 HOST_WIDE_INT align = 1;
13997 unsigned HOST_WIDE_INT count = 0;
13998 HOST_WIDE_INT expected_size = -1;
13999 int size_needed = 0, epilogue_size_needed;
14000 int desired_align = 0;
14001 enum stringop_alg alg;
14002 int dynamic_check;
14003
14004 if (CONST_INT_P (align_exp))
14005 align = INTVAL (align_exp);
14006 /* i386 can do misaligned access on reasonably increased cost. */
14007 if (CONST_INT_P (expected_align_exp)
14008 && INTVAL (expected_align_exp) > align)
14009 align = INTVAL (expected_align_exp);
14010 if (CONST_INT_P (count_exp))
14011 count = expected_size = INTVAL (count_exp);
14012 if (CONST_INT_P (expected_size_exp) && count == 0)
14013 expected_size = INTVAL (expected_size_exp);
14014
14015 /* Step 0: Decide on preferred algorithm, desired alignment and
14016 size of chunks to be copied by main loop. */
14017
14018 alg = decide_alg (count, expected_size, false, &dynamic_check);
14019 desired_align = decide_alignment (align, alg, expected_size);
14020
14021 if (!TARGET_ALIGN_STRINGOPS)
14022 align = desired_align;
14023
14024 if (alg == libcall)
14025 return 0;
14026 gcc_assert (alg != no_stringop);
14027 if (!count)
14028 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14029 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14030 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14031 switch (alg)
14032 {
14033 case libcall:
14034 case no_stringop:
14035 gcc_unreachable ();
14036 case loop:
14037 size_needed = GET_MODE_SIZE (Pmode);
14038 break;
14039 case unrolled_loop:
14040 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14041 break;
14042 case rep_prefix_8_byte:
14043 size_needed = 8;
14044 break;
14045 case rep_prefix_4_byte:
14046 size_needed = 4;
14047 break;
14048 case rep_prefix_1_byte:
14049 case loop_1_byte:
14050 size_needed = 1;
14051 break;
14052 }
14053
14054 epilogue_size_needed = size_needed;
14055
14056 /* Step 1: Prologue guard. */
14057
14058 /* Alignment code needs count to be in register. */
14059 if (CONST_INT_P (count_exp) && desired_align > align)
14060 {
14061 enum machine_mode mode = SImode;
14062 if (TARGET_64BIT && (count & ~0xffffffff))
14063 mode = DImode;
14064 count_exp = force_reg (mode, count_exp);
14065 }
14066 gcc_assert (desired_align >= 1 && align >= 1);
14067
14068 /* Ensure that alignment prologue won't copy past end of block. */
14069 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14070 {
14071 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14072 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14073 Make sure it is power of 2. */
14074 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14075
14076 label = gen_label_rtx ();
14077 emit_cmp_and_jump_insns (count_exp,
14078 GEN_INT (epilogue_size_needed),
14079 LTU, 0, counter_mode (count_exp), 1, label);
14080 if (GET_CODE (count_exp) == CONST_INT)
14081 ;
14082 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14083 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14084 else
14085 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14086 }
14087 /* Emit code to decide on runtime whether library call or inline should be
14088 used. */
14089 if (dynamic_check != -1)
14090 {
14091 rtx hot_label = gen_label_rtx ();
14092 jump_around_label = gen_label_rtx ();
14093 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14094 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14095 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14096 emit_block_move_via_libcall (dst, src, count_exp, false);
14097 emit_jump (jump_around_label);
14098 emit_label (hot_label);
14099 }
14100
14101 /* Step 2: Alignment prologue. */
14102
14103 if (desired_align > align)
14104 {
14105 /* Except for the first move in epilogue, we no longer know
14106 constant offset in aliasing info. It don't seems to worth
14107 the pain to maintain it for the first move, so throw away
14108 the info early. */
14109 src = change_address (src, BLKmode, srcreg);
14110 dst = change_address (dst, BLKmode, destreg);
14111 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14112 desired_align);
14113 }
14114 if (label && size_needed == 1)
14115 {
14116 emit_label (label);
14117 LABEL_NUSES (label) = 1;
14118 label = NULL;
14119 }
14120
14121 /* Step 3: Main loop. */
14122
14123 switch (alg)
14124 {
14125 case libcall:
14126 case no_stringop:
14127 gcc_unreachable ();
14128 case loop_1_byte:
14129 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14130 count_exp, QImode, 1, expected_size);
14131 break;
14132 case loop:
14133 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14134 count_exp, Pmode, 1, expected_size);
14135 break;
14136 case unrolled_loop:
14137 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14138 registers for 4 temporaries anyway. */
14139 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14140 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14141 expected_size);
14142 break;
14143 case rep_prefix_8_byte:
14144 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14145 DImode);
14146 break;
14147 case rep_prefix_4_byte:
14148 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14149 SImode);
14150 break;
14151 case rep_prefix_1_byte:
14152 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14153 QImode);
14154 break;
14155 }
14156 /* Adjust properly the offset of src and dest memory for aliasing. */
14157 if (CONST_INT_P (count_exp))
14158 {
14159 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14160 (count / size_needed) * size_needed);
14161 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14162 (count / size_needed) * size_needed);
14163 }
14164 else
14165 {
14166 src = change_address (src, BLKmode, srcreg);
14167 dst = change_address (dst, BLKmode, destreg);
14168 }
14169
14170 /* Step 4: Epilogue to copy the remaining bytes. */
14171
14172 if (label)
14173 {
14174 /* When the main loop is done, COUNT_EXP might hold original count,
14175 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14176 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14177 bytes. Compensate if needed. */
14178
14179 if (size_needed < epilogue_size_needed)
14180 {
14181 tmp =
14182 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14183 GEN_INT (size_needed - 1), count_exp, 1,
14184 OPTAB_DIRECT);
14185 if (tmp != count_exp)
14186 emit_move_insn (count_exp, tmp);
14187 }
14188 emit_label (label);
14189 LABEL_NUSES (label) = 1;
14190 }
14191
14192 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14193 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
14194 epilogue_size_needed);
14195 if (jump_around_label)
14196 emit_label (jump_around_label);
14197 return 1;
14198 }
14199
14200 /* Helper function for memcpy. For QImode value 0xXY produce
14201 0xXYXYXYXY of wide specified by MODE. This is essentially
14202 a * 0x10101010, but we can do slightly better than
14203 synth_mult by unwinding the sequence by hand on CPUs with
14204 slow multiply. */
14205 static rtx
14206 promote_duplicated_reg (enum machine_mode mode, rtx val)
14207 {
14208 enum machine_mode valmode = GET_MODE (val);
14209 rtx tmp;
14210 int nops = mode == DImode ? 3 : 2;
14211
14212 gcc_assert (mode == SImode || mode == DImode);
14213 if (val == const0_rtx)
14214 return copy_to_mode_reg (mode, const0_rtx);
14215 if (CONST_INT_P (val))
14216 {
14217 HOST_WIDE_INT v = INTVAL (val) & 255;
14218
14219 v |= v << 8;
14220 v |= v << 16;
14221 if (mode == DImode)
14222 v |= (v << 16) << 16;
14223 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
14224 }
14225
14226 if (valmode == VOIDmode)
14227 valmode = QImode;
14228 if (valmode != QImode)
14229 val = gen_lowpart (QImode, val);
14230 if (mode == QImode)
14231 return val;
14232 if (!TARGET_PARTIAL_REG_STALL)
14233 nops--;
14234 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
14235 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
14236 <= (ix86_cost->shift_const + ix86_cost->add) * nops
14237 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
14238 {
14239 rtx reg = convert_modes (mode, QImode, val, true);
14240 tmp = promote_duplicated_reg (mode, const1_rtx);
14241 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
14242 OPTAB_DIRECT);
14243 }
14244 else
14245 {
14246 rtx reg = convert_modes (mode, QImode, val, true);
14247
14248 if (!TARGET_PARTIAL_REG_STALL)
14249 if (mode == SImode)
14250 emit_insn (gen_movsi_insv_1 (reg, reg));
14251 else
14252 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
14253 else
14254 {
14255 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
14256 NULL, 1, OPTAB_DIRECT);
14257 reg =
14258 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14259 }
14260 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
14261 NULL, 1, OPTAB_DIRECT);
14262 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14263 if (mode == SImode)
14264 return reg;
14265 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
14266 NULL, 1, OPTAB_DIRECT);
14267 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
14268 return reg;
14269 }
14270 }
14271
14272 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
14273 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
14274 alignment from ALIGN to DESIRED_ALIGN. */
14275 static rtx
14276 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
14277 {
14278 rtx promoted_val;
14279
14280 if (TARGET_64BIT
14281 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
14282 promoted_val = promote_duplicated_reg (DImode, val);
14283 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
14284 promoted_val = promote_duplicated_reg (SImode, val);
14285 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
14286 promoted_val = promote_duplicated_reg (HImode, val);
14287 else
14288 promoted_val = val;
14289
14290 return promoted_val;
14291 }
14292
14293 /* Expand string clear operation (bzero). Use i386 string operations when
14294 profitable. See expand_movmem comment for explanation of individual
14295 steps performed. */
14296 int
14297 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
14298 rtx expected_align_exp, rtx expected_size_exp)
14299 {
14300 rtx destreg;
14301 rtx label = NULL;
14302 rtx tmp;
14303 rtx jump_around_label = NULL;
14304 HOST_WIDE_INT align = 1;
14305 unsigned HOST_WIDE_INT count = 0;
14306 HOST_WIDE_INT expected_size = -1;
14307 int size_needed = 0, epilogue_size_needed;
14308 int desired_align = 0;
14309 enum stringop_alg alg;
14310 rtx promoted_val = NULL;
14311 bool force_loopy_epilogue = false;
14312 int dynamic_check;
14313
14314 if (CONST_INT_P (align_exp))
14315 align = INTVAL (align_exp);
14316 /* i386 can do misaligned access on reasonably increased cost. */
14317 if (CONST_INT_P (expected_align_exp)
14318 && INTVAL (expected_align_exp) > align)
14319 align = INTVAL (expected_align_exp);
14320 if (CONST_INT_P (count_exp))
14321 count = expected_size = INTVAL (count_exp);
14322 if (CONST_INT_P (expected_size_exp) && count == 0)
14323 expected_size = INTVAL (expected_size_exp);
14324
14325 /* Step 0: Decide on preferred algorithm, desired alignment and
14326 size of chunks to be copied by main loop. */
14327
14328 alg = decide_alg (count, expected_size, true, &dynamic_check);
14329 desired_align = decide_alignment (align, alg, expected_size);
14330
14331 if (!TARGET_ALIGN_STRINGOPS)
14332 align = desired_align;
14333
14334 if (alg == libcall)
14335 return 0;
14336 gcc_assert (alg != no_stringop);
14337 if (!count)
14338 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
14339 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14340 switch (alg)
14341 {
14342 case libcall:
14343 case no_stringop:
14344 gcc_unreachable ();
14345 case loop:
14346 size_needed = GET_MODE_SIZE (Pmode);
14347 break;
14348 case unrolled_loop:
14349 size_needed = GET_MODE_SIZE (Pmode) * 4;
14350 break;
14351 case rep_prefix_8_byte:
14352 size_needed = 8;
14353 break;
14354 case rep_prefix_4_byte:
14355 size_needed = 4;
14356 break;
14357 case rep_prefix_1_byte:
14358 case loop_1_byte:
14359 size_needed = 1;
14360 break;
14361 }
14362 epilogue_size_needed = size_needed;
14363
14364 /* Step 1: Prologue guard. */
14365
14366 /* Alignment code needs count to be in register. */
14367 if (CONST_INT_P (count_exp) && desired_align > align)
14368 {
14369 enum machine_mode mode = SImode;
14370 if (TARGET_64BIT && (count & ~0xffffffff))
14371 mode = DImode;
14372 count_exp = force_reg (mode, count_exp);
14373 }
14374 /* Do the cheap promotion to allow better CSE across the
14375 main loop and epilogue (ie one load of the big constant in the
14376 front of all code. */
14377 if (CONST_INT_P (val_exp))
14378 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14379 desired_align, align);
14380 /* Ensure that alignment prologue won't copy past end of block. */
14381 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14382 {
14383 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14384 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14385 Make sure it is power of 2. */
14386 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14387
14388 /* To improve performance of small blocks, we jump around the VAL
14389 promoting mode. This mean that if the promoted VAL is not constant,
14390 we might not use it in the epilogue and have to use byte
14391 loop variant. */
14392 if (epilogue_size_needed > 2 && !promoted_val)
14393 force_loopy_epilogue = true;
14394 label = gen_label_rtx ();
14395 emit_cmp_and_jump_insns (count_exp,
14396 GEN_INT (epilogue_size_needed),
14397 LTU, 0, counter_mode (count_exp), 1, label);
14398 if (GET_CODE (count_exp) == CONST_INT)
14399 ;
14400 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
14401 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14402 else
14403 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14404 }
14405 if (dynamic_check != -1)
14406 {
14407 rtx hot_label = gen_label_rtx ();
14408 jump_around_label = gen_label_rtx ();
14409 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14410 LEU, 0, counter_mode (count_exp), 1, hot_label);
14411 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14412 set_storage_via_libcall (dst, count_exp, val_exp, false);
14413 emit_jump (jump_around_label);
14414 emit_label (hot_label);
14415 }
14416
14417 /* Step 2: Alignment prologue. */
14418
14419 /* Do the expensive promotion once we branched off the small blocks. */
14420 if (!promoted_val)
14421 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
14422 desired_align, align);
14423 gcc_assert (desired_align >= 1 && align >= 1);
14424
14425 if (desired_align > align)
14426 {
14427 /* Except for the first move in epilogue, we no longer know
14428 constant offset in aliasing info. It don't seems to worth
14429 the pain to maintain it for the first move, so throw away
14430 the info early. */
14431 dst = change_address (dst, BLKmode, destreg);
14432 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
14433 desired_align);
14434 }
14435 if (label && size_needed == 1)
14436 {
14437 emit_label (label);
14438 LABEL_NUSES (label) = 1;
14439 label = NULL;
14440 }
14441
14442 /* Step 3: Main loop. */
14443
14444 switch (alg)
14445 {
14446 case libcall:
14447 case no_stringop:
14448 gcc_unreachable ();
14449 case loop_1_byte:
14450 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14451 count_exp, QImode, 1, expected_size);
14452 break;
14453 case loop:
14454 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14455 count_exp, Pmode, 1, expected_size);
14456 break;
14457 case unrolled_loop:
14458 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
14459 count_exp, Pmode, 4, expected_size);
14460 break;
14461 case rep_prefix_8_byte:
14462 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14463 DImode);
14464 break;
14465 case rep_prefix_4_byte:
14466 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14467 SImode);
14468 break;
14469 case rep_prefix_1_byte:
14470 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
14471 QImode);
14472 break;
14473 }
14474 /* Adjust properly the offset of src and dest memory for aliasing. */
14475 if (CONST_INT_P (count_exp))
14476 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14477 (count / size_needed) * size_needed);
14478 else
14479 dst = change_address (dst, BLKmode, destreg);
14480
14481 /* Step 4: Epilogue to copy the remaining bytes. */
14482
14483 if (label)
14484 {
14485 /* When the main loop is done, COUNT_EXP might hold original count,
14486 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14487 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14488 bytes. Compensate if needed. */
14489
14490 if (size_needed < desired_align - align)
14491 {
14492 tmp =
14493 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14494 GEN_INT (size_needed - 1), count_exp, 1,
14495 OPTAB_DIRECT);
14496 size_needed = desired_align - align + 1;
14497 if (tmp != count_exp)
14498 emit_move_insn (count_exp, tmp);
14499 }
14500 emit_label (label);
14501 LABEL_NUSES (label) = 1;
14502 }
14503 if (count_exp != const0_rtx && epilogue_size_needed > 1)
14504 {
14505 if (force_loopy_epilogue)
14506 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
14507 size_needed);
14508 else
14509 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
14510 size_needed);
14511 }
14512 if (jump_around_label)
14513 emit_label (jump_around_label);
14514 return 1;
14515 }
14516
14517 /* Expand the appropriate insns for doing strlen if not just doing
14518 repnz; scasb
14519
14520 out = result, initialized with the start address
14521 align_rtx = alignment of the address.
14522 scratch = scratch register, initialized with the startaddress when
14523 not aligned, otherwise undefined
14524
14525 This is just the body. It needs the initializations mentioned above and
14526 some address computing at the end. These things are done in i386.md. */
14527
14528 static void
14529 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
14530 {
14531 int align;
14532 rtx tmp;
14533 rtx align_2_label = NULL_RTX;
14534 rtx align_3_label = NULL_RTX;
14535 rtx align_4_label = gen_label_rtx ();
14536 rtx end_0_label = gen_label_rtx ();
14537 rtx mem;
14538 rtx tmpreg = gen_reg_rtx (SImode);
14539 rtx scratch = gen_reg_rtx (SImode);
14540 rtx cmp;
14541
14542 align = 0;
14543 if (CONST_INT_P (align_rtx))
14544 align = INTVAL (align_rtx);
14545
14546 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
14547
14548 /* Is there a known alignment and is it less than 4? */
14549 if (align < 4)
14550 {
14551 rtx scratch1 = gen_reg_rtx (Pmode);
14552 emit_move_insn (scratch1, out);
14553 /* Is there a known alignment and is it not 2? */
14554 if (align != 2)
14555 {
14556 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
14557 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
14558
14559 /* Leave just the 3 lower bits. */
14560 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
14561 NULL_RTX, 0, OPTAB_WIDEN);
14562
14563 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14564 Pmode, 1, align_4_label);
14565 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
14566 Pmode, 1, align_2_label);
14567 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
14568 Pmode, 1, align_3_label);
14569 }
14570 else
14571 {
14572 /* Since the alignment is 2, we have to check 2 or 0 bytes;
14573 check if is aligned to 4 - byte. */
14574
14575 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
14576 NULL_RTX, 0, OPTAB_WIDEN);
14577
14578 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
14579 Pmode, 1, align_4_label);
14580 }
14581
14582 mem = change_address (src, QImode, out);
14583
14584 /* Now compare the bytes. */
14585
14586 /* Compare the first n unaligned byte on a byte per byte basis. */
14587 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
14588 QImode, 1, end_0_label);
14589
14590 /* Increment the address. */
14591 if (TARGET_64BIT)
14592 emit_insn (gen_adddi3 (out, out, const1_rtx));
14593 else
14594 emit_insn (gen_addsi3 (out, out, const1_rtx));
14595
14596 /* Not needed with an alignment of 2 */
14597 if (align != 2)
14598 {
14599 emit_label (align_2_label);
14600
14601 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14602 end_0_label);
14603
14604 if (TARGET_64BIT)
14605 emit_insn (gen_adddi3 (out, out, const1_rtx));
14606 else
14607 emit_insn (gen_addsi3 (out, out, const1_rtx));
14608
14609 emit_label (align_3_label);
14610 }
14611
14612 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
14613 end_0_label);
14614
14615 if (TARGET_64BIT)
14616 emit_insn (gen_adddi3 (out, out, const1_rtx));
14617 else
14618 emit_insn (gen_addsi3 (out, out, const1_rtx));
14619 }
14620
14621 /* Generate loop to check 4 bytes at a time. It is not a good idea to
14622 align this loop. It gives only huge programs, but does not help to
14623 speed up. */
14624 emit_label (align_4_label);
14625
14626 mem = change_address (src, SImode, out);
14627 emit_move_insn (scratch, mem);
14628 if (TARGET_64BIT)
14629 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
14630 else
14631 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
14632
14633 /* This formula yields a nonzero result iff one of the bytes is zero.
14634 This saves three branches inside loop and many cycles. */
14635
14636 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
14637 emit_insn (gen_one_cmplsi2 (scratch, scratch));
14638 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
14639 emit_insn (gen_andsi3 (tmpreg, tmpreg,
14640 gen_int_mode (0x80808080, SImode)));
14641 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
14642 align_4_label);
14643
14644 if (TARGET_CMOVE)
14645 {
14646 rtx reg = gen_reg_rtx (SImode);
14647 rtx reg2 = gen_reg_rtx (Pmode);
14648 emit_move_insn (reg, tmpreg);
14649 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
14650
14651 /* If zero is not in the first two bytes, move two bytes forward. */
14652 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14653 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14654 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14655 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
14656 gen_rtx_IF_THEN_ELSE (SImode, tmp,
14657 reg,
14658 tmpreg)));
14659 /* Emit lea manually to avoid clobbering of flags. */
14660 emit_insn (gen_rtx_SET (SImode, reg2,
14661 gen_rtx_PLUS (Pmode, out, const2_rtx)));
14662
14663 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14664 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
14665 emit_insn (gen_rtx_SET (VOIDmode, out,
14666 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
14667 reg2,
14668 out)));
14669
14670 }
14671 else
14672 {
14673 rtx end_2_label = gen_label_rtx ();
14674 /* Is zero in the first two bytes? */
14675
14676 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
14677 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
14678 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
14679 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
14680 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
14681 pc_rtx);
14682 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
14683 JUMP_LABEL (tmp) = end_2_label;
14684
14685 /* Not in the first two. Move two bytes forward. */
14686 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
14687 if (TARGET_64BIT)
14688 emit_insn (gen_adddi3 (out, out, const2_rtx));
14689 else
14690 emit_insn (gen_addsi3 (out, out, const2_rtx));
14691
14692 emit_label (end_2_label);
14693
14694 }
14695
14696 /* Avoid branch in fixing the byte. */
14697 tmpreg = gen_lowpart (QImode, tmpreg);
14698 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
14699 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, 17), const0_rtx);
14700 if (TARGET_64BIT)
14701 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
14702 else
14703 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
14704
14705 emit_label (end_0_label);
14706 }
14707
14708 /* Expand strlen. */
14709
14710 int
14711 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
14712 {
14713 rtx addr, scratch1, scratch2, scratch3, scratch4;
14714
14715 /* The generic case of strlen expander is long. Avoid it's
14716 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
14717
14718 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14719 && !TARGET_INLINE_ALL_STRINGOPS
14720 && !optimize_size
14721 && (!CONST_INT_P (align) || INTVAL (align) < 4))
14722 return 0;
14723
14724 addr = force_reg (Pmode, XEXP (src, 0));
14725 scratch1 = gen_reg_rtx (Pmode);
14726
14727 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
14728 && !optimize_size)
14729 {
14730 /* Well it seems that some optimizer does not combine a call like
14731 foo(strlen(bar), strlen(bar));
14732 when the move and the subtraction is done here. It does calculate
14733 the length just once when these instructions are done inside of
14734 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
14735 often used and I use one fewer register for the lifetime of
14736 output_strlen_unroll() this is better. */
14737
14738 emit_move_insn (out, addr);
14739
14740 ix86_expand_strlensi_unroll_1 (out, src, align);
14741
14742 /* strlensi_unroll_1 returns the address of the zero at the end of
14743 the string, like memchr(), so compute the length by subtracting
14744 the start address. */
14745 if (TARGET_64BIT)
14746 emit_insn (gen_subdi3 (out, out, addr));
14747 else
14748 emit_insn (gen_subsi3 (out, out, addr));
14749 }
14750 else
14751 {
14752 rtx unspec;
14753 scratch2 = gen_reg_rtx (Pmode);
14754 scratch3 = gen_reg_rtx (Pmode);
14755 scratch4 = force_reg (Pmode, constm1_rtx);
14756
14757 emit_move_insn (scratch3, addr);
14758 eoschar = force_reg (QImode, eoschar);
14759
14760 src = replace_equiv_address_nv (src, scratch3);
14761
14762 /* If .md starts supporting :P, this can be done in .md. */
14763 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
14764 scratch4), UNSPEC_SCAS);
14765 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
14766 if (TARGET_64BIT)
14767 {
14768 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
14769 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
14770 }
14771 else
14772 {
14773 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
14774 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
14775 }
14776 }
14777 return 1;
14778 }
14779
14780 /* For given symbol (function) construct code to compute address of it's PLT
14781 entry in large x86-64 PIC model. */
14782 rtx
14783 construct_plt_address (rtx symbol)
14784 {
14785 rtx tmp = gen_reg_rtx (Pmode);
14786 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
14787
14788 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
14789 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
14790
14791 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
14792 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
14793 return tmp;
14794 }
14795
14796 void
14797 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
14798 rtx callarg2 ATTRIBUTE_UNUSED,
14799 rtx pop, int sibcall)
14800 {
14801 rtx use = NULL, call;
14802
14803 if (pop == const0_rtx)
14804 pop = NULL;
14805 gcc_assert (!TARGET_64BIT || !pop);
14806
14807 if (TARGET_MACHO && !TARGET_64BIT)
14808 {
14809 #if TARGET_MACHO
14810 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
14811 fnaddr = machopic_indirect_call_target (fnaddr);
14812 #endif
14813 }
14814 else
14815 {
14816 /* Static functions and indirect calls don't need the pic register. */
14817 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
14818 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14819 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
14820 use_reg (&use, pic_offset_table_rtx);
14821 }
14822
14823 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
14824 {
14825 rtx al = gen_rtx_REG (QImode, 0);
14826 emit_move_insn (al, callarg2);
14827 use_reg (&use, al);
14828 }
14829
14830 if (ix86_cmodel == CM_LARGE_PIC
14831 && GET_CODE (fnaddr) == MEM
14832 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
14833 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
14834 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
14835 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
14836 {
14837 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14838 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14839 }
14840 if (sibcall && TARGET_64BIT
14841 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
14842 {
14843 rtx addr;
14844 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
14845 fnaddr = gen_rtx_REG (Pmode, R11_REG);
14846 emit_move_insn (fnaddr, addr);
14847 fnaddr = gen_rtx_MEM (QImode, fnaddr);
14848 }
14849
14850 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
14851 if (retval)
14852 call = gen_rtx_SET (VOIDmode, retval, call);
14853 if (pop)
14854 {
14855 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
14856 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
14857 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
14858 }
14859
14860 call = emit_call_insn (call);
14861 if (use)
14862 CALL_INSN_FUNCTION_USAGE (call) = use;
14863 }
14864
14865 \f
14866 /* Clear stack slot assignments remembered from previous functions.
14867 This is called from INIT_EXPANDERS once before RTL is emitted for each
14868 function. */
14869
14870 static struct machine_function *
14871 ix86_init_machine_status (void)
14872 {
14873 struct machine_function *f;
14874
14875 f = ggc_alloc_cleared (sizeof (struct machine_function));
14876 f->use_fast_prologue_epilogue_nregs = -1;
14877 f->tls_descriptor_call_expanded_p = 0;
14878
14879 return f;
14880 }
14881
14882 /* Return a MEM corresponding to a stack slot with mode MODE.
14883 Allocate a new slot if necessary.
14884
14885 The RTL for a function can have several slots available: N is
14886 which slot to use. */
14887
14888 rtx
14889 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
14890 {
14891 struct stack_local_entry *s;
14892
14893 gcc_assert (n < MAX_386_STACK_LOCALS);
14894
14895 for (s = ix86_stack_locals; s; s = s->next)
14896 if (s->mode == mode && s->n == n)
14897 return copy_rtx (s->rtl);
14898
14899 s = (struct stack_local_entry *)
14900 ggc_alloc (sizeof (struct stack_local_entry));
14901 s->n = n;
14902 s->mode = mode;
14903 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
14904
14905 s->next = ix86_stack_locals;
14906 ix86_stack_locals = s;
14907 return s->rtl;
14908 }
14909
14910 /* Construct the SYMBOL_REF for the tls_get_addr function. */
14911
14912 static GTY(()) rtx ix86_tls_symbol;
14913 rtx
14914 ix86_tls_get_addr (void)
14915 {
14916
14917 if (!ix86_tls_symbol)
14918 {
14919 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
14920 (TARGET_ANY_GNU_TLS
14921 && !TARGET_64BIT)
14922 ? "___tls_get_addr"
14923 : "__tls_get_addr");
14924 }
14925
14926 return ix86_tls_symbol;
14927 }
14928
14929 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
14930
14931 static GTY(()) rtx ix86_tls_module_base_symbol;
14932 rtx
14933 ix86_tls_module_base (void)
14934 {
14935
14936 if (!ix86_tls_module_base_symbol)
14937 {
14938 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
14939 "_TLS_MODULE_BASE_");
14940 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
14941 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
14942 }
14943
14944 return ix86_tls_module_base_symbol;
14945 }
14946 \f
14947 /* Calculate the length of the memory address in the instruction
14948 encoding. Does not include the one-byte modrm, opcode, or prefix. */
14949
14950 int
14951 memory_address_length (rtx addr)
14952 {
14953 struct ix86_address parts;
14954 rtx base, index, disp;
14955 int len;
14956 int ok;
14957
14958 if (GET_CODE (addr) == PRE_DEC
14959 || GET_CODE (addr) == POST_INC
14960 || GET_CODE (addr) == PRE_MODIFY
14961 || GET_CODE (addr) == POST_MODIFY)
14962 return 0;
14963
14964 ok = ix86_decompose_address (addr, &parts);
14965 gcc_assert (ok);
14966
14967 if (parts.base && GET_CODE (parts.base) == SUBREG)
14968 parts.base = SUBREG_REG (parts.base);
14969 if (parts.index && GET_CODE (parts.index) == SUBREG)
14970 parts.index = SUBREG_REG (parts.index);
14971
14972 base = parts.base;
14973 index = parts.index;
14974 disp = parts.disp;
14975 len = 0;
14976
14977 /* Rule of thumb:
14978 - esp as the base always wants an index,
14979 - ebp as the base always wants a displacement. */
14980
14981 /* Register Indirect. */
14982 if (base && !index && !disp)
14983 {
14984 /* esp (for its index) and ebp (for its displacement) need
14985 the two-byte modrm form. */
14986 if (addr == stack_pointer_rtx
14987 || addr == arg_pointer_rtx
14988 || addr == frame_pointer_rtx
14989 || addr == hard_frame_pointer_rtx)
14990 len = 1;
14991 }
14992
14993 /* Direct Addressing. */
14994 else if (disp && !base && !index)
14995 len = 4;
14996
14997 else
14998 {
14999 /* Find the length of the displacement constant. */
15000 if (disp)
15001 {
15002 if (base && satisfies_constraint_K (disp))
15003 len = 1;
15004 else
15005 len = 4;
15006 }
15007 /* ebp always wants a displacement. */
15008 else if (base == hard_frame_pointer_rtx)
15009 len = 1;
15010
15011 /* An index requires the two-byte modrm form.... */
15012 if (index
15013 /* ...like esp, which always wants an index. */
15014 || base == stack_pointer_rtx
15015 || base == arg_pointer_rtx
15016 || base == frame_pointer_rtx)
15017 len += 1;
15018 }
15019
15020 return len;
15021 }
15022
15023 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15024 is set, expect that insn have 8bit immediate alternative. */
15025 int
15026 ix86_attr_length_immediate_default (rtx insn, int shortform)
15027 {
15028 int len = 0;
15029 int i;
15030 extract_insn_cached (insn);
15031 for (i = recog_data.n_operands - 1; i >= 0; --i)
15032 if (CONSTANT_P (recog_data.operand[i]))
15033 {
15034 gcc_assert (!len);
15035 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15036 len = 1;
15037 else
15038 {
15039 switch (get_attr_mode (insn))
15040 {
15041 case MODE_QI:
15042 len+=1;
15043 break;
15044 case MODE_HI:
15045 len+=2;
15046 break;
15047 case MODE_SI:
15048 len+=4;
15049 break;
15050 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15051 case MODE_DI:
15052 len+=4;
15053 break;
15054 default:
15055 fatal_insn ("unknown insn mode", insn);
15056 }
15057 }
15058 }
15059 return len;
15060 }
15061 /* Compute default value for "length_address" attribute. */
15062 int
15063 ix86_attr_length_address_default (rtx insn)
15064 {
15065 int i;
15066
15067 if (get_attr_type (insn) == TYPE_LEA)
15068 {
15069 rtx set = PATTERN (insn);
15070
15071 if (GET_CODE (set) == PARALLEL)
15072 set = XVECEXP (set, 0, 0);
15073
15074 gcc_assert (GET_CODE (set) == SET);
15075
15076 return memory_address_length (SET_SRC (set));
15077 }
15078
15079 extract_insn_cached (insn);
15080 for (i = recog_data.n_operands - 1; i >= 0; --i)
15081 if (MEM_P (recog_data.operand[i]))
15082 {
15083 return memory_address_length (XEXP (recog_data.operand[i], 0));
15084 break;
15085 }
15086 return 0;
15087 }
15088 \f
15089 /* Return the maximum number of instructions a cpu can issue. */
15090
15091 static int
15092 ix86_issue_rate (void)
15093 {
15094 switch (ix86_tune)
15095 {
15096 case PROCESSOR_PENTIUM:
15097 case PROCESSOR_K6:
15098 return 2;
15099
15100 case PROCESSOR_PENTIUMPRO:
15101 case PROCESSOR_PENTIUM4:
15102 case PROCESSOR_ATHLON:
15103 case PROCESSOR_K8:
15104 case PROCESSOR_AMDFAM10:
15105 case PROCESSOR_NOCONA:
15106 case PROCESSOR_GENERIC32:
15107 case PROCESSOR_GENERIC64:
15108 return 3;
15109
15110 case PROCESSOR_CORE2:
15111 return 4;
15112
15113 default:
15114 return 1;
15115 }
15116 }
15117
15118 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15119 by DEP_INSN and nothing set by DEP_INSN. */
15120
15121 static int
15122 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15123 {
15124 rtx set, set2;
15125
15126 /* Simplify the test for uninteresting insns. */
15127 if (insn_type != TYPE_SETCC
15128 && insn_type != TYPE_ICMOV
15129 && insn_type != TYPE_FCMOV
15130 && insn_type != TYPE_IBR)
15131 return 0;
15132
15133 if ((set = single_set (dep_insn)) != 0)
15134 {
15135 set = SET_DEST (set);
15136 set2 = NULL_RTX;
15137 }
15138 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15139 && XVECLEN (PATTERN (dep_insn), 0) == 2
15140 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15141 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15142 {
15143 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15144 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15145 }
15146 else
15147 return 0;
15148
15149 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15150 return 0;
15151
15152 /* This test is true if the dependent insn reads the flags but
15153 not any other potentially set register. */
15154 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15155 return 0;
15156
15157 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15158 return 0;
15159
15160 return 1;
15161 }
15162
15163 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15164 address with operands set by DEP_INSN. */
15165
15166 static int
15167 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15168 {
15169 rtx addr;
15170
15171 if (insn_type == TYPE_LEA
15172 && TARGET_PENTIUM)
15173 {
15174 addr = PATTERN (insn);
15175
15176 if (GET_CODE (addr) == PARALLEL)
15177 addr = XVECEXP (addr, 0, 0);
15178
15179 gcc_assert (GET_CODE (addr) == SET);
15180
15181 addr = SET_SRC (addr);
15182 }
15183 else
15184 {
15185 int i;
15186 extract_insn_cached (insn);
15187 for (i = recog_data.n_operands - 1; i >= 0; --i)
15188 if (MEM_P (recog_data.operand[i]))
15189 {
15190 addr = XEXP (recog_data.operand[i], 0);
15191 goto found;
15192 }
15193 return 0;
15194 found:;
15195 }
15196
15197 return modified_in_p (addr, dep_insn);
15198 }
15199
15200 static int
15201 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
15202 {
15203 enum attr_type insn_type, dep_insn_type;
15204 enum attr_memory memory;
15205 rtx set, set2;
15206 int dep_insn_code_number;
15207
15208 /* Anti and output dependencies have zero cost on all CPUs. */
15209 if (REG_NOTE_KIND (link) != 0)
15210 return 0;
15211
15212 dep_insn_code_number = recog_memoized (dep_insn);
15213
15214 /* If we can't recognize the insns, we can't really do anything. */
15215 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
15216 return cost;
15217
15218 insn_type = get_attr_type (insn);
15219 dep_insn_type = get_attr_type (dep_insn);
15220
15221 switch (ix86_tune)
15222 {
15223 case PROCESSOR_PENTIUM:
15224 /* Address Generation Interlock adds a cycle of latency. */
15225 if (ix86_agi_dependent (insn, dep_insn, insn_type))
15226 cost += 1;
15227
15228 /* ??? Compares pair with jump/setcc. */
15229 if (ix86_flags_dependent (insn, dep_insn, insn_type))
15230 cost = 0;
15231
15232 /* Floating point stores require value to be ready one cycle earlier. */
15233 if (insn_type == TYPE_FMOV
15234 && get_attr_memory (insn) == MEMORY_STORE
15235 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15236 cost += 1;
15237 break;
15238
15239 case PROCESSOR_PENTIUMPRO:
15240 memory = get_attr_memory (insn);
15241
15242 /* INT->FP conversion is expensive. */
15243 if (get_attr_fp_int_src (dep_insn))
15244 cost += 5;
15245
15246 /* There is one cycle extra latency between an FP op and a store. */
15247 if (insn_type == TYPE_FMOV
15248 && (set = single_set (dep_insn)) != NULL_RTX
15249 && (set2 = single_set (insn)) != NULL_RTX
15250 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
15251 && MEM_P (SET_DEST (set2)))
15252 cost += 1;
15253
15254 /* Show ability of reorder buffer to hide latency of load by executing
15255 in parallel with previous instruction in case
15256 previous instruction is not needed to compute the address. */
15257 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15258 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15259 {
15260 /* Claim moves to take one cycle, as core can issue one load
15261 at time and the next load can start cycle later. */
15262 if (dep_insn_type == TYPE_IMOV
15263 || dep_insn_type == TYPE_FMOV)
15264 cost = 1;
15265 else if (cost > 1)
15266 cost--;
15267 }
15268 break;
15269
15270 case PROCESSOR_K6:
15271 memory = get_attr_memory (insn);
15272
15273 /* The esp dependency is resolved before the instruction is really
15274 finished. */
15275 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
15276 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
15277 return 1;
15278
15279 /* INT->FP conversion is expensive. */
15280 if (get_attr_fp_int_src (dep_insn))
15281 cost += 5;
15282
15283 /* Show ability of reorder buffer to hide latency of load by executing
15284 in parallel with previous instruction in case
15285 previous instruction is not needed to compute the address. */
15286 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15287 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15288 {
15289 /* Claim moves to take one cycle, as core can issue one load
15290 at time and the next load can start cycle later. */
15291 if (dep_insn_type == TYPE_IMOV
15292 || dep_insn_type == TYPE_FMOV)
15293 cost = 1;
15294 else if (cost > 2)
15295 cost -= 2;
15296 else
15297 cost = 1;
15298 }
15299 break;
15300
15301 case PROCESSOR_ATHLON:
15302 case PROCESSOR_K8:
15303 case PROCESSOR_AMDFAM10:
15304 case PROCESSOR_GENERIC32:
15305 case PROCESSOR_GENERIC64:
15306 memory = get_attr_memory (insn);
15307
15308 /* Show ability of reorder buffer to hide latency of load by executing
15309 in parallel with previous instruction in case
15310 previous instruction is not needed to compute the address. */
15311 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
15312 && !ix86_agi_dependent (insn, dep_insn, insn_type))
15313 {
15314 enum attr_unit unit = get_attr_unit (insn);
15315 int loadcost = 3;
15316
15317 /* Because of the difference between the length of integer and
15318 floating unit pipeline preparation stages, the memory operands
15319 for floating point are cheaper.
15320
15321 ??? For Athlon it the difference is most probably 2. */
15322 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
15323 loadcost = 3;
15324 else
15325 loadcost = TARGET_ATHLON ? 2 : 0;
15326
15327 if (cost >= loadcost)
15328 cost -= loadcost;
15329 else
15330 cost = 0;
15331 }
15332
15333 default:
15334 break;
15335 }
15336
15337 return cost;
15338 }
15339
15340 /* How many alternative schedules to try. This should be as wide as the
15341 scheduling freedom in the DFA, but no wider. Making this value too
15342 large results extra work for the scheduler. */
15343
15344 static int
15345 ia32_multipass_dfa_lookahead (void)
15346 {
15347 if (ix86_tune == PROCESSOR_PENTIUM)
15348 return 2;
15349
15350 if (ix86_tune == PROCESSOR_PENTIUMPRO
15351 || ix86_tune == PROCESSOR_K6)
15352 return 1;
15353
15354 else
15355 return 0;
15356 }
15357
15358 \f
15359 /* Compute the alignment given to a constant that is being placed in memory.
15360 EXP is the constant and ALIGN is the alignment that the object would
15361 ordinarily have.
15362 The value of this function is used instead of that alignment to align
15363 the object. */
15364
15365 int
15366 ix86_constant_alignment (tree exp, int align)
15367 {
15368 if (TREE_CODE (exp) == REAL_CST)
15369 {
15370 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
15371 return 64;
15372 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
15373 return 128;
15374 }
15375 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
15376 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
15377 return BITS_PER_WORD;
15378
15379 return align;
15380 }
15381
15382 /* Compute the alignment for a static variable.
15383 TYPE is the data type, and ALIGN is the alignment that
15384 the object would ordinarily have. The value of this function is used
15385 instead of that alignment to align the object. */
15386
15387 int
15388 ix86_data_alignment (tree type, int align)
15389 {
15390 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
15391
15392 if (AGGREGATE_TYPE_P (type)
15393 && TYPE_SIZE (type)
15394 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15395 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
15396 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
15397 && align < max_align)
15398 align = max_align;
15399
15400 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15401 to 16byte boundary. */
15402 if (TARGET_64BIT)
15403 {
15404 if (AGGREGATE_TYPE_P (type)
15405 && TYPE_SIZE (type)
15406 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15407 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
15408 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15409 return 128;
15410 }
15411
15412 if (TREE_CODE (type) == ARRAY_TYPE)
15413 {
15414 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15415 return 64;
15416 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15417 return 128;
15418 }
15419 else if (TREE_CODE (type) == COMPLEX_TYPE)
15420 {
15421
15422 if (TYPE_MODE (type) == DCmode && align < 64)
15423 return 64;
15424 if (TYPE_MODE (type) == XCmode && align < 128)
15425 return 128;
15426 }
15427 else if ((TREE_CODE (type) == RECORD_TYPE
15428 || TREE_CODE (type) == UNION_TYPE
15429 || TREE_CODE (type) == QUAL_UNION_TYPE)
15430 && TYPE_FIELDS (type))
15431 {
15432 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15433 return 64;
15434 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15435 return 128;
15436 }
15437 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15438 || TREE_CODE (type) == INTEGER_TYPE)
15439 {
15440 if (TYPE_MODE (type) == DFmode && align < 64)
15441 return 64;
15442 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15443 return 128;
15444 }
15445
15446 return align;
15447 }
15448
15449 /* Compute the alignment for a local variable.
15450 TYPE is the data type, and ALIGN is the alignment that
15451 the object would ordinarily have. The value of this macro is used
15452 instead of that alignment to align the object. */
15453
15454 int
15455 ix86_local_alignment (tree type, int align)
15456 {
15457 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
15458 to 16byte boundary. */
15459 if (TARGET_64BIT)
15460 {
15461 if (AGGREGATE_TYPE_P (type)
15462 && TYPE_SIZE (type)
15463 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
15464 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
15465 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
15466 return 128;
15467 }
15468 if (TREE_CODE (type) == ARRAY_TYPE)
15469 {
15470 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
15471 return 64;
15472 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
15473 return 128;
15474 }
15475 else if (TREE_CODE (type) == COMPLEX_TYPE)
15476 {
15477 if (TYPE_MODE (type) == DCmode && align < 64)
15478 return 64;
15479 if (TYPE_MODE (type) == XCmode && align < 128)
15480 return 128;
15481 }
15482 else if ((TREE_CODE (type) == RECORD_TYPE
15483 || TREE_CODE (type) == UNION_TYPE
15484 || TREE_CODE (type) == QUAL_UNION_TYPE)
15485 && TYPE_FIELDS (type))
15486 {
15487 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
15488 return 64;
15489 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
15490 return 128;
15491 }
15492 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
15493 || TREE_CODE (type) == INTEGER_TYPE)
15494 {
15495
15496 if (TYPE_MODE (type) == DFmode && align < 64)
15497 return 64;
15498 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
15499 return 128;
15500 }
15501 return align;
15502 }
15503 \f
15504 /* Emit RTL insns to initialize the variable parts of a trampoline.
15505 FNADDR is an RTX for the address of the function's pure code.
15506 CXT is an RTX for the static chain value for the function. */
15507 void
15508 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
15509 {
15510 if (!TARGET_64BIT)
15511 {
15512 /* Compute offset from the end of the jmp to the target function. */
15513 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
15514 plus_constant (tramp, 10),
15515 NULL_RTX, 1, OPTAB_DIRECT);
15516 emit_move_insn (gen_rtx_MEM (QImode, tramp),
15517 gen_int_mode (0xb9, QImode));
15518 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
15519 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
15520 gen_int_mode (0xe9, QImode));
15521 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
15522 }
15523 else
15524 {
15525 int offset = 0;
15526 /* Try to load address using shorter movl instead of movabs.
15527 We may want to support movq for kernel mode, but kernel does not use
15528 trampolines at the moment. */
15529 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
15530 {
15531 fnaddr = copy_to_mode_reg (DImode, fnaddr);
15532 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15533 gen_int_mode (0xbb41, HImode));
15534 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
15535 gen_lowpart (SImode, fnaddr));
15536 offset += 6;
15537 }
15538 else
15539 {
15540 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15541 gen_int_mode (0xbb49, HImode));
15542 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15543 fnaddr);
15544 offset += 10;
15545 }
15546 /* Load static chain using movabs to r10. */
15547 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15548 gen_int_mode (0xba49, HImode));
15549 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
15550 cxt);
15551 offset += 10;
15552 /* Jump to the r11 */
15553 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
15554 gen_int_mode (0xff49, HImode));
15555 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
15556 gen_int_mode (0xe3, QImode));
15557 offset += 3;
15558 gcc_assert (offset <= TRAMPOLINE_SIZE);
15559 }
15560
15561 #ifdef ENABLE_EXECUTE_STACK
15562 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
15563 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
15564 #endif
15565 }
15566 \f
15567 /* Codes for all the SSE/MMX builtins. */
15568 enum ix86_builtins
15569 {
15570 IX86_BUILTIN_ADDPS,
15571 IX86_BUILTIN_ADDSS,
15572 IX86_BUILTIN_DIVPS,
15573 IX86_BUILTIN_DIVSS,
15574 IX86_BUILTIN_MULPS,
15575 IX86_BUILTIN_MULSS,
15576 IX86_BUILTIN_SUBPS,
15577 IX86_BUILTIN_SUBSS,
15578
15579 IX86_BUILTIN_CMPEQPS,
15580 IX86_BUILTIN_CMPLTPS,
15581 IX86_BUILTIN_CMPLEPS,
15582 IX86_BUILTIN_CMPGTPS,
15583 IX86_BUILTIN_CMPGEPS,
15584 IX86_BUILTIN_CMPNEQPS,
15585 IX86_BUILTIN_CMPNLTPS,
15586 IX86_BUILTIN_CMPNLEPS,
15587 IX86_BUILTIN_CMPNGTPS,
15588 IX86_BUILTIN_CMPNGEPS,
15589 IX86_BUILTIN_CMPORDPS,
15590 IX86_BUILTIN_CMPUNORDPS,
15591 IX86_BUILTIN_CMPEQSS,
15592 IX86_BUILTIN_CMPLTSS,
15593 IX86_BUILTIN_CMPLESS,
15594 IX86_BUILTIN_CMPNEQSS,
15595 IX86_BUILTIN_CMPNLTSS,
15596 IX86_BUILTIN_CMPNLESS,
15597 IX86_BUILTIN_CMPNGTSS,
15598 IX86_BUILTIN_CMPNGESS,
15599 IX86_BUILTIN_CMPORDSS,
15600 IX86_BUILTIN_CMPUNORDSS,
15601
15602 IX86_BUILTIN_COMIEQSS,
15603 IX86_BUILTIN_COMILTSS,
15604 IX86_BUILTIN_COMILESS,
15605 IX86_BUILTIN_COMIGTSS,
15606 IX86_BUILTIN_COMIGESS,
15607 IX86_BUILTIN_COMINEQSS,
15608 IX86_BUILTIN_UCOMIEQSS,
15609 IX86_BUILTIN_UCOMILTSS,
15610 IX86_BUILTIN_UCOMILESS,
15611 IX86_BUILTIN_UCOMIGTSS,
15612 IX86_BUILTIN_UCOMIGESS,
15613 IX86_BUILTIN_UCOMINEQSS,
15614
15615 IX86_BUILTIN_CVTPI2PS,
15616 IX86_BUILTIN_CVTPS2PI,
15617 IX86_BUILTIN_CVTSI2SS,
15618 IX86_BUILTIN_CVTSI642SS,
15619 IX86_BUILTIN_CVTSS2SI,
15620 IX86_BUILTIN_CVTSS2SI64,
15621 IX86_BUILTIN_CVTTPS2PI,
15622 IX86_BUILTIN_CVTTSS2SI,
15623 IX86_BUILTIN_CVTTSS2SI64,
15624
15625 IX86_BUILTIN_MAXPS,
15626 IX86_BUILTIN_MAXSS,
15627 IX86_BUILTIN_MINPS,
15628 IX86_BUILTIN_MINSS,
15629
15630 IX86_BUILTIN_LOADUPS,
15631 IX86_BUILTIN_STOREUPS,
15632 IX86_BUILTIN_MOVSS,
15633
15634 IX86_BUILTIN_MOVHLPS,
15635 IX86_BUILTIN_MOVLHPS,
15636 IX86_BUILTIN_LOADHPS,
15637 IX86_BUILTIN_LOADLPS,
15638 IX86_BUILTIN_STOREHPS,
15639 IX86_BUILTIN_STORELPS,
15640
15641 IX86_BUILTIN_MASKMOVQ,
15642 IX86_BUILTIN_MOVMSKPS,
15643 IX86_BUILTIN_PMOVMSKB,
15644
15645 IX86_BUILTIN_MOVNTPS,
15646 IX86_BUILTIN_MOVNTQ,
15647
15648 IX86_BUILTIN_LOADDQU,
15649 IX86_BUILTIN_STOREDQU,
15650
15651 IX86_BUILTIN_PACKSSWB,
15652 IX86_BUILTIN_PACKSSDW,
15653 IX86_BUILTIN_PACKUSWB,
15654
15655 IX86_BUILTIN_PADDB,
15656 IX86_BUILTIN_PADDW,
15657 IX86_BUILTIN_PADDD,
15658 IX86_BUILTIN_PADDQ,
15659 IX86_BUILTIN_PADDSB,
15660 IX86_BUILTIN_PADDSW,
15661 IX86_BUILTIN_PADDUSB,
15662 IX86_BUILTIN_PADDUSW,
15663 IX86_BUILTIN_PSUBB,
15664 IX86_BUILTIN_PSUBW,
15665 IX86_BUILTIN_PSUBD,
15666 IX86_BUILTIN_PSUBQ,
15667 IX86_BUILTIN_PSUBSB,
15668 IX86_BUILTIN_PSUBSW,
15669 IX86_BUILTIN_PSUBUSB,
15670 IX86_BUILTIN_PSUBUSW,
15671
15672 IX86_BUILTIN_PAND,
15673 IX86_BUILTIN_PANDN,
15674 IX86_BUILTIN_POR,
15675 IX86_BUILTIN_PXOR,
15676
15677 IX86_BUILTIN_PAVGB,
15678 IX86_BUILTIN_PAVGW,
15679
15680 IX86_BUILTIN_PCMPEQB,
15681 IX86_BUILTIN_PCMPEQW,
15682 IX86_BUILTIN_PCMPEQD,
15683 IX86_BUILTIN_PCMPGTB,
15684 IX86_BUILTIN_PCMPGTW,
15685 IX86_BUILTIN_PCMPGTD,
15686
15687 IX86_BUILTIN_PMADDWD,
15688
15689 IX86_BUILTIN_PMAXSW,
15690 IX86_BUILTIN_PMAXUB,
15691 IX86_BUILTIN_PMINSW,
15692 IX86_BUILTIN_PMINUB,
15693
15694 IX86_BUILTIN_PMULHUW,
15695 IX86_BUILTIN_PMULHW,
15696 IX86_BUILTIN_PMULLW,
15697
15698 IX86_BUILTIN_PSADBW,
15699 IX86_BUILTIN_PSHUFW,
15700
15701 IX86_BUILTIN_PSLLW,
15702 IX86_BUILTIN_PSLLD,
15703 IX86_BUILTIN_PSLLQ,
15704 IX86_BUILTIN_PSRAW,
15705 IX86_BUILTIN_PSRAD,
15706 IX86_BUILTIN_PSRLW,
15707 IX86_BUILTIN_PSRLD,
15708 IX86_BUILTIN_PSRLQ,
15709 IX86_BUILTIN_PSLLWI,
15710 IX86_BUILTIN_PSLLDI,
15711 IX86_BUILTIN_PSLLQI,
15712 IX86_BUILTIN_PSRAWI,
15713 IX86_BUILTIN_PSRADI,
15714 IX86_BUILTIN_PSRLWI,
15715 IX86_BUILTIN_PSRLDI,
15716 IX86_BUILTIN_PSRLQI,
15717
15718 IX86_BUILTIN_PUNPCKHBW,
15719 IX86_BUILTIN_PUNPCKHWD,
15720 IX86_BUILTIN_PUNPCKHDQ,
15721 IX86_BUILTIN_PUNPCKLBW,
15722 IX86_BUILTIN_PUNPCKLWD,
15723 IX86_BUILTIN_PUNPCKLDQ,
15724
15725 IX86_BUILTIN_SHUFPS,
15726
15727 IX86_BUILTIN_RCPPS,
15728 IX86_BUILTIN_RCPSS,
15729 IX86_BUILTIN_RSQRTPS,
15730 IX86_BUILTIN_RSQRTSS,
15731 IX86_BUILTIN_SQRTPS,
15732 IX86_BUILTIN_SQRTSS,
15733
15734 IX86_BUILTIN_UNPCKHPS,
15735 IX86_BUILTIN_UNPCKLPS,
15736
15737 IX86_BUILTIN_ANDPS,
15738 IX86_BUILTIN_ANDNPS,
15739 IX86_BUILTIN_ORPS,
15740 IX86_BUILTIN_XORPS,
15741
15742 IX86_BUILTIN_EMMS,
15743 IX86_BUILTIN_LDMXCSR,
15744 IX86_BUILTIN_STMXCSR,
15745 IX86_BUILTIN_SFENCE,
15746
15747 /* 3DNow! Original */
15748 IX86_BUILTIN_FEMMS,
15749 IX86_BUILTIN_PAVGUSB,
15750 IX86_BUILTIN_PF2ID,
15751 IX86_BUILTIN_PFACC,
15752 IX86_BUILTIN_PFADD,
15753 IX86_BUILTIN_PFCMPEQ,
15754 IX86_BUILTIN_PFCMPGE,
15755 IX86_BUILTIN_PFCMPGT,
15756 IX86_BUILTIN_PFMAX,
15757 IX86_BUILTIN_PFMIN,
15758 IX86_BUILTIN_PFMUL,
15759 IX86_BUILTIN_PFRCP,
15760 IX86_BUILTIN_PFRCPIT1,
15761 IX86_BUILTIN_PFRCPIT2,
15762 IX86_BUILTIN_PFRSQIT1,
15763 IX86_BUILTIN_PFRSQRT,
15764 IX86_BUILTIN_PFSUB,
15765 IX86_BUILTIN_PFSUBR,
15766 IX86_BUILTIN_PI2FD,
15767 IX86_BUILTIN_PMULHRW,
15768
15769 /* 3DNow! Athlon Extensions */
15770 IX86_BUILTIN_PF2IW,
15771 IX86_BUILTIN_PFNACC,
15772 IX86_BUILTIN_PFPNACC,
15773 IX86_BUILTIN_PI2FW,
15774 IX86_BUILTIN_PSWAPDSI,
15775 IX86_BUILTIN_PSWAPDSF,
15776
15777 /* SSE2 */
15778 IX86_BUILTIN_ADDPD,
15779 IX86_BUILTIN_ADDSD,
15780 IX86_BUILTIN_DIVPD,
15781 IX86_BUILTIN_DIVSD,
15782 IX86_BUILTIN_MULPD,
15783 IX86_BUILTIN_MULSD,
15784 IX86_BUILTIN_SUBPD,
15785 IX86_BUILTIN_SUBSD,
15786
15787 IX86_BUILTIN_CMPEQPD,
15788 IX86_BUILTIN_CMPLTPD,
15789 IX86_BUILTIN_CMPLEPD,
15790 IX86_BUILTIN_CMPGTPD,
15791 IX86_BUILTIN_CMPGEPD,
15792 IX86_BUILTIN_CMPNEQPD,
15793 IX86_BUILTIN_CMPNLTPD,
15794 IX86_BUILTIN_CMPNLEPD,
15795 IX86_BUILTIN_CMPNGTPD,
15796 IX86_BUILTIN_CMPNGEPD,
15797 IX86_BUILTIN_CMPORDPD,
15798 IX86_BUILTIN_CMPUNORDPD,
15799 IX86_BUILTIN_CMPEQSD,
15800 IX86_BUILTIN_CMPLTSD,
15801 IX86_BUILTIN_CMPLESD,
15802 IX86_BUILTIN_CMPNEQSD,
15803 IX86_BUILTIN_CMPNLTSD,
15804 IX86_BUILTIN_CMPNLESD,
15805 IX86_BUILTIN_CMPORDSD,
15806 IX86_BUILTIN_CMPUNORDSD,
15807
15808 IX86_BUILTIN_COMIEQSD,
15809 IX86_BUILTIN_COMILTSD,
15810 IX86_BUILTIN_COMILESD,
15811 IX86_BUILTIN_COMIGTSD,
15812 IX86_BUILTIN_COMIGESD,
15813 IX86_BUILTIN_COMINEQSD,
15814 IX86_BUILTIN_UCOMIEQSD,
15815 IX86_BUILTIN_UCOMILTSD,
15816 IX86_BUILTIN_UCOMILESD,
15817 IX86_BUILTIN_UCOMIGTSD,
15818 IX86_BUILTIN_UCOMIGESD,
15819 IX86_BUILTIN_UCOMINEQSD,
15820
15821 IX86_BUILTIN_MAXPD,
15822 IX86_BUILTIN_MAXSD,
15823 IX86_BUILTIN_MINPD,
15824 IX86_BUILTIN_MINSD,
15825
15826 IX86_BUILTIN_ANDPD,
15827 IX86_BUILTIN_ANDNPD,
15828 IX86_BUILTIN_ORPD,
15829 IX86_BUILTIN_XORPD,
15830
15831 IX86_BUILTIN_SQRTPD,
15832 IX86_BUILTIN_SQRTSD,
15833
15834 IX86_BUILTIN_UNPCKHPD,
15835 IX86_BUILTIN_UNPCKLPD,
15836
15837 IX86_BUILTIN_SHUFPD,
15838
15839 IX86_BUILTIN_LOADUPD,
15840 IX86_BUILTIN_STOREUPD,
15841 IX86_BUILTIN_MOVSD,
15842
15843 IX86_BUILTIN_LOADHPD,
15844 IX86_BUILTIN_LOADLPD,
15845
15846 IX86_BUILTIN_CVTDQ2PD,
15847 IX86_BUILTIN_CVTDQ2PS,
15848
15849 IX86_BUILTIN_CVTPD2DQ,
15850 IX86_BUILTIN_CVTPD2PI,
15851 IX86_BUILTIN_CVTPD2PS,
15852 IX86_BUILTIN_CVTTPD2DQ,
15853 IX86_BUILTIN_CVTTPD2PI,
15854
15855 IX86_BUILTIN_CVTPI2PD,
15856 IX86_BUILTIN_CVTSI2SD,
15857 IX86_BUILTIN_CVTSI642SD,
15858
15859 IX86_BUILTIN_CVTSD2SI,
15860 IX86_BUILTIN_CVTSD2SI64,
15861 IX86_BUILTIN_CVTSD2SS,
15862 IX86_BUILTIN_CVTSS2SD,
15863 IX86_BUILTIN_CVTTSD2SI,
15864 IX86_BUILTIN_CVTTSD2SI64,
15865
15866 IX86_BUILTIN_CVTPS2DQ,
15867 IX86_BUILTIN_CVTPS2PD,
15868 IX86_BUILTIN_CVTTPS2DQ,
15869
15870 IX86_BUILTIN_MOVNTI,
15871 IX86_BUILTIN_MOVNTPD,
15872 IX86_BUILTIN_MOVNTDQ,
15873
15874 /* SSE2 MMX */
15875 IX86_BUILTIN_MASKMOVDQU,
15876 IX86_BUILTIN_MOVMSKPD,
15877 IX86_BUILTIN_PMOVMSKB128,
15878
15879 IX86_BUILTIN_PACKSSWB128,
15880 IX86_BUILTIN_PACKSSDW128,
15881 IX86_BUILTIN_PACKUSWB128,
15882
15883 IX86_BUILTIN_PADDB128,
15884 IX86_BUILTIN_PADDW128,
15885 IX86_BUILTIN_PADDD128,
15886 IX86_BUILTIN_PADDQ128,
15887 IX86_BUILTIN_PADDSB128,
15888 IX86_BUILTIN_PADDSW128,
15889 IX86_BUILTIN_PADDUSB128,
15890 IX86_BUILTIN_PADDUSW128,
15891 IX86_BUILTIN_PSUBB128,
15892 IX86_BUILTIN_PSUBW128,
15893 IX86_BUILTIN_PSUBD128,
15894 IX86_BUILTIN_PSUBQ128,
15895 IX86_BUILTIN_PSUBSB128,
15896 IX86_BUILTIN_PSUBSW128,
15897 IX86_BUILTIN_PSUBUSB128,
15898 IX86_BUILTIN_PSUBUSW128,
15899
15900 IX86_BUILTIN_PAND128,
15901 IX86_BUILTIN_PANDN128,
15902 IX86_BUILTIN_POR128,
15903 IX86_BUILTIN_PXOR128,
15904
15905 IX86_BUILTIN_PAVGB128,
15906 IX86_BUILTIN_PAVGW128,
15907
15908 IX86_BUILTIN_PCMPEQB128,
15909 IX86_BUILTIN_PCMPEQW128,
15910 IX86_BUILTIN_PCMPEQD128,
15911 IX86_BUILTIN_PCMPGTB128,
15912 IX86_BUILTIN_PCMPGTW128,
15913 IX86_BUILTIN_PCMPGTD128,
15914
15915 IX86_BUILTIN_PMADDWD128,
15916
15917 IX86_BUILTIN_PMAXSW128,
15918 IX86_BUILTIN_PMAXUB128,
15919 IX86_BUILTIN_PMINSW128,
15920 IX86_BUILTIN_PMINUB128,
15921
15922 IX86_BUILTIN_PMULUDQ,
15923 IX86_BUILTIN_PMULUDQ128,
15924 IX86_BUILTIN_PMULHUW128,
15925 IX86_BUILTIN_PMULHW128,
15926 IX86_BUILTIN_PMULLW128,
15927
15928 IX86_BUILTIN_PSADBW128,
15929 IX86_BUILTIN_PSHUFHW,
15930 IX86_BUILTIN_PSHUFLW,
15931 IX86_BUILTIN_PSHUFD,
15932
15933 IX86_BUILTIN_PSLLDQI128,
15934 IX86_BUILTIN_PSLLWI128,
15935 IX86_BUILTIN_PSLLDI128,
15936 IX86_BUILTIN_PSLLQI128,
15937 IX86_BUILTIN_PSRAWI128,
15938 IX86_BUILTIN_PSRADI128,
15939 IX86_BUILTIN_PSRLDQI128,
15940 IX86_BUILTIN_PSRLWI128,
15941 IX86_BUILTIN_PSRLDI128,
15942 IX86_BUILTIN_PSRLQI128,
15943
15944 IX86_BUILTIN_PSLLDQ128,
15945 IX86_BUILTIN_PSLLW128,
15946 IX86_BUILTIN_PSLLD128,
15947 IX86_BUILTIN_PSLLQ128,
15948 IX86_BUILTIN_PSRAW128,
15949 IX86_BUILTIN_PSRAD128,
15950 IX86_BUILTIN_PSRLW128,
15951 IX86_BUILTIN_PSRLD128,
15952 IX86_BUILTIN_PSRLQ128,
15953
15954 IX86_BUILTIN_PUNPCKHBW128,
15955 IX86_BUILTIN_PUNPCKHWD128,
15956 IX86_BUILTIN_PUNPCKHDQ128,
15957 IX86_BUILTIN_PUNPCKHQDQ128,
15958 IX86_BUILTIN_PUNPCKLBW128,
15959 IX86_BUILTIN_PUNPCKLWD128,
15960 IX86_BUILTIN_PUNPCKLDQ128,
15961 IX86_BUILTIN_PUNPCKLQDQ128,
15962
15963 IX86_BUILTIN_CLFLUSH,
15964 IX86_BUILTIN_MFENCE,
15965 IX86_BUILTIN_LFENCE,
15966
15967 /* Prescott New Instructions. */
15968 IX86_BUILTIN_ADDSUBPS,
15969 IX86_BUILTIN_HADDPS,
15970 IX86_BUILTIN_HSUBPS,
15971 IX86_BUILTIN_MOVSHDUP,
15972 IX86_BUILTIN_MOVSLDUP,
15973 IX86_BUILTIN_ADDSUBPD,
15974 IX86_BUILTIN_HADDPD,
15975 IX86_BUILTIN_HSUBPD,
15976 IX86_BUILTIN_LDDQU,
15977
15978 IX86_BUILTIN_MONITOR,
15979 IX86_BUILTIN_MWAIT,
15980
15981 /* SSSE3. */
15982 IX86_BUILTIN_PHADDW,
15983 IX86_BUILTIN_PHADDD,
15984 IX86_BUILTIN_PHADDSW,
15985 IX86_BUILTIN_PHSUBW,
15986 IX86_BUILTIN_PHSUBD,
15987 IX86_BUILTIN_PHSUBSW,
15988 IX86_BUILTIN_PMADDUBSW,
15989 IX86_BUILTIN_PMULHRSW,
15990 IX86_BUILTIN_PSHUFB,
15991 IX86_BUILTIN_PSIGNB,
15992 IX86_BUILTIN_PSIGNW,
15993 IX86_BUILTIN_PSIGND,
15994 IX86_BUILTIN_PALIGNR,
15995 IX86_BUILTIN_PABSB,
15996 IX86_BUILTIN_PABSW,
15997 IX86_BUILTIN_PABSD,
15998
15999 IX86_BUILTIN_PHADDW128,
16000 IX86_BUILTIN_PHADDD128,
16001 IX86_BUILTIN_PHADDSW128,
16002 IX86_BUILTIN_PHSUBW128,
16003 IX86_BUILTIN_PHSUBD128,
16004 IX86_BUILTIN_PHSUBSW128,
16005 IX86_BUILTIN_PMADDUBSW128,
16006 IX86_BUILTIN_PMULHRSW128,
16007 IX86_BUILTIN_PSHUFB128,
16008 IX86_BUILTIN_PSIGNB128,
16009 IX86_BUILTIN_PSIGNW128,
16010 IX86_BUILTIN_PSIGND128,
16011 IX86_BUILTIN_PALIGNR128,
16012 IX86_BUILTIN_PABSB128,
16013 IX86_BUILTIN_PABSW128,
16014 IX86_BUILTIN_PABSD128,
16015
16016 /* AMDFAM10 - SSE4A New Instructions. */
16017 IX86_BUILTIN_MOVNTSD,
16018 IX86_BUILTIN_MOVNTSS,
16019 IX86_BUILTIN_EXTRQI,
16020 IX86_BUILTIN_EXTRQ,
16021 IX86_BUILTIN_INSERTQI,
16022 IX86_BUILTIN_INSERTQ,
16023
16024 IX86_BUILTIN_VEC_INIT_V2SI,
16025 IX86_BUILTIN_VEC_INIT_V4HI,
16026 IX86_BUILTIN_VEC_INIT_V8QI,
16027 IX86_BUILTIN_VEC_EXT_V2DF,
16028 IX86_BUILTIN_VEC_EXT_V2DI,
16029 IX86_BUILTIN_VEC_EXT_V4SF,
16030 IX86_BUILTIN_VEC_EXT_V4SI,
16031 IX86_BUILTIN_VEC_EXT_V8HI,
16032 IX86_BUILTIN_VEC_EXT_V2SI,
16033 IX86_BUILTIN_VEC_EXT_V4HI,
16034 IX86_BUILTIN_VEC_SET_V8HI,
16035 IX86_BUILTIN_VEC_SET_V4HI,
16036
16037 IX86_BUILTIN_MAX
16038 };
16039
16040 /* Table for the ix86 builtin decls. */
16041 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16042
16043 /* Add a ix86 target builtin function with CODE, NAME and TYPE. Do so,
16044 * if the target_flags include one of MASK. Stores the function decl
16045 * in the ix86_builtins array.
16046 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16047
16048 static inline tree
16049 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16050 {
16051 tree decl = NULL_TREE;
16052
16053 if (mask & target_flags
16054 && (!(mask & MASK_64BIT) || TARGET_64BIT))
16055 {
16056 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16057 NULL, NULL_TREE);
16058 ix86_builtins[(int) code] = decl;
16059 }
16060
16061 return decl;
16062 }
16063
16064 /* Like def_builtin, but also marks the function decl "const". */
16065
16066 static inline tree
16067 def_builtin_const (int mask, const char *name, tree type,
16068 enum ix86_builtins code)
16069 {
16070 tree decl = def_builtin (mask, name, type, code);
16071 if (decl)
16072 TREE_READONLY (decl) = 1;
16073 return decl;
16074 }
16075
16076 /* Bits for builtin_description.flag. */
16077
16078 /* Set when we don't support the comparison natively, and should
16079 swap_comparison in order to support it. */
16080 #define BUILTIN_DESC_SWAP_OPERANDS 1
16081
16082 struct builtin_description
16083 {
16084 const unsigned int mask;
16085 const enum insn_code icode;
16086 const char *const name;
16087 const enum ix86_builtins code;
16088 const enum rtx_code comparison;
16089 const unsigned int flag;
16090 };
16091
16092 static const struct builtin_description bdesc_comi[] =
16093 {
16094 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16095 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
16096 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
16097 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
16098 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
16099 { MASK_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
16100 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
16101 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
16102 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
16103 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
16104 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
16105 { MASK_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
16106 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
16107 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
16108 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
16109 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
16110 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
16111 { MASK_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
16112 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
16113 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
16114 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
16115 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
16116 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
16117 { MASK_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
16118 };
16119
16120 static const struct builtin_description bdesc_2arg[] =
16121 {
16122 /* SSE */
16123 { MASK_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, 0, 0 },
16124 { MASK_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, 0, 0 },
16125 { MASK_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, 0, 0 },
16126 { MASK_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, 0, 0 },
16127 { MASK_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, 0, 0 },
16128 { MASK_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, 0, 0 },
16129 { MASK_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, 0, 0 },
16130 { MASK_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, 0, 0 },
16131
16132 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
16133 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
16134 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
16135 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT,
16136 BUILTIN_DESC_SWAP_OPERANDS },
16137 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE,
16138 BUILTIN_DESC_SWAP_OPERANDS },
16139 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
16140 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
16141 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
16142 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
16143 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE,
16144 BUILTIN_DESC_SWAP_OPERANDS },
16145 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT,
16146 BUILTIN_DESC_SWAP_OPERANDS },
16147 { MASK_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
16148 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
16149 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
16150 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
16151 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
16152 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
16153 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
16154 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
16155 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE,
16156 BUILTIN_DESC_SWAP_OPERANDS },
16157 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT,
16158 BUILTIN_DESC_SWAP_OPERANDS },
16159 { MASK_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, UNORDERED, 0 },
16160
16161 { MASK_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, 0, 0 },
16162 { MASK_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, 0, 0 },
16163 { MASK_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, 0, 0 },
16164 { MASK_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, 0, 0 },
16165
16166 { MASK_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, 0, 0 },
16167 { MASK_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, 0, 0 },
16168 { MASK_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, 0, 0 },
16169 { MASK_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, 0, 0 },
16170
16171 { MASK_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, 0, 0 },
16172 { MASK_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, 0, 0 },
16173 { MASK_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, 0, 0 },
16174 { MASK_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, 0, 0 },
16175 { MASK_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, 0, 0 },
16176
16177 /* MMX */
16178 { MASK_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, 0, 0 },
16179 { MASK_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, 0, 0 },
16180 { MASK_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, 0, 0 },
16181 { MASK_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, 0, 0 },
16182 { MASK_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, 0, 0 },
16183 { MASK_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, 0, 0 },
16184 { MASK_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, 0, 0 },
16185 { MASK_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, 0, 0 },
16186
16187 { MASK_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, 0, 0 },
16188 { MASK_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, 0, 0 },
16189 { MASK_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, 0, 0 },
16190 { MASK_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, 0, 0 },
16191 { MASK_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, 0, 0 },
16192 { MASK_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, 0, 0 },
16193 { MASK_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, 0, 0 },
16194 { MASK_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, 0, 0 },
16195
16196 { MASK_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, 0, 0 },
16197 { MASK_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, 0, 0 },
16198 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, 0, 0 },
16199
16200 { MASK_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, 0, 0 },
16201 { MASK_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, 0, 0 },
16202 { MASK_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, 0, 0 },
16203 { MASK_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, 0, 0 },
16204
16205 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, 0, 0 },
16206 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, 0, 0 },
16207
16208 { MASK_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, 0, 0 },
16209 { MASK_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, 0, 0 },
16210 { MASK_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, 0, 0 },
16211 { MASK_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, 0, 0 },
16212 { MASK_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, 0, 0 },
16213 { MASK_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, 0, 0 },
16214
16215 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, 0, 0 },
16216 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, 0, 0 },
16217 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, 0, 0 },
16218 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, 0, 0 },
16219
16220 { MASK_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, 0, 0 },
16221 { MASK_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, 0, 0 },
16222 { MASK_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, 0, 0 },
16223 { MASK_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, 0, 0 },
16224 { MASK_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, 0, 0 },
16225 { MASK_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, 0, 0 },
16226
16227 /* Special. */
16228 { MASK_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, 0, 0 },
16229 { MASK_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, 0, 0 },
16230 { MASK_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, 0, 0 },
16231
16232 { MASK_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, 0, 0 },
16233 { MASK_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, 0, 0 },
16234 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, 0, 0 },
16235
16236 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, 0, 0 },
16237 { MASK_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, 0, 0 },
16238 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, 0, 0 },
16239 { MASK_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, 0, 0 },
16240 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, 0, 0 },
16241 { MASK_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, 0, 0 },
16242
16243 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, 0, 0 },
16244 { MASK_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, 0, 0 },
16245 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, 0, 0 },
16246 { MASK_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, 0, 0 },
16247 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, 0, 0 },
16248 { MASK_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, 0, 0 },
16249
16250 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, 0, 0 },
16251 { MASK_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, 0, 0 },
16252 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, 0, 0 },
16253 { MASK_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, 0, 0 },
16254
16255 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, 0, 0 },
16256 { MASK_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, 0, 0 },
16257
16258 /* SSE2 */
16259 { MASK_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, 0, 0 },
16260 { MASK_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, 0, 0 },
16261 { MASK_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, 0, 0 },
16262 { MASK_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, 0, 0 },
16263 { MASK_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, 0, 0 },
16264 { MASK_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, 0, 0 },
16265 { MASK_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, 0, 0 },
16266 { MASK_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, 0, 0 },
16267
16268 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
16269 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
16270 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
16271 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT,
16272 BUILTIN_DESC_SWAP_OPERANDS },
16273 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE,
16274 BUILTIN_DESC_SWAP_OPERANDS },
16275 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
16276 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
16277 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
16278 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
16279 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE,
16280 BUILTIN_DESC_SWAP_OPERANDS },
16281 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT,
16282 BUILTIN_DESC_SWAP_OPERANDS },
16283 { MASK_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
16284 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
16285 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
16286 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
16287 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
16288 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
16289 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
16290 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
16291 { MASK_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
16292
16293 { MASK_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, 0, 0 },
16294 { MASK_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, 0, 0 },
16295 { MASK_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, 0, 0 },
16296 { MASK_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, 0, 0 },
16297
16298 { MASK_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, 0, 0 },
16299 { MASK_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, 0, 0 },
16300 { MASK_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, 0, 0 },
16301 { MASK_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, 0, 0 },
16302
16303 { MASK_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, 0, 0 },
16304 { MASK_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, 0, 0 },
16305 { MASK_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, 0, 0 },
16306
16307 /* SSE2 MMX */
16308 { MASK_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, 0, 0 },
16309 { MASK_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, 0, 0 },
16310 { MASK_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, 0, 0 },
16311 { MASK_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, 0, 0 },
16312 { MASK_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, 0, 0 },
16313 { MASK_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, 0, 0 },
16314 { MASK_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, 0, 0 },
16315 { MASK_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, 0, 0 },
16316
16317 { MASK_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, 0, 0 },
16318 { MASK_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, 0, 0 },
16319 { MASK_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, 0, 0 },
16320 { MASK_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, 0, 0 },
16321 { MASK_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, 0, 0 },
16322 { MASK_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, 0, 0 },
16323 { MASK_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, 0, 0 },
16324 { MASK_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, 0, 0 },
16325
16326 { MASK_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, 0, 0 },
16327 { MASK_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, 0, 0 },
16328
16329 { MASK_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, 0, 0 },
16330 { MASK_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, 0, 0 },
16331 { MASK_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, 0, 0 },
16332 { MASK_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, 0, 0 },
16333
16334 { MASK_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, 0, 0 },
16335 { MASK_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, 0, 0 },
16336
16337 { MASK_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, 0, 0 },
16338 { MASK_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, 0, 0 },
16339 { MASK_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, 0, 0 },
16340 { MASK_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, 0, 0 },
16341 { MASK_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, 0, 0 },
16342 { MASK_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, 0, 0 },
16343
16344 { MASK_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, 0, 0 },
16345 { MASK_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, 0, 0 },
16346 { MASK_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, 0, 0 },
16347 { MASK_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, 0, 0 },
16348
16349 { MASK_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, 0, 0 },
16350 { MASK_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, 0, 0 },
16351 { MASK_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, 0, 0 },
16352 { MASK_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, 0, 0 },
16353 { MASK_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, 0, 0 },
16354 { MASK_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, 0, 0 },
16355 { MASK_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, 0, 0 },
16356 { MASK_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, 0, 0 },
16357
16358 { MASK_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, 0, 0 },
16359 { MASK_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, 0, 0 },
16360 { MASK_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, 0, 0 },
16361
16362 { MASK_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, 0, 0 },
16363 { MASK_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, 0, 0 },
16364
16365 { MASK_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, 0, 0 },
16366 { MASK_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, 0, 0 },
16367
16368 { MASK_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, 0, 0 },
16369 { MASK_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, 0, 0 },
16370 { MASK_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, 0, 0 },
16371
16372 { MASK_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, 0, 0 },
16373 { MASK_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, 0, 0 },
16374 { MASK_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, 0, 0 },
16375
16376 { MASK_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, 0, 0 },
16377 { MASK_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, 0, 0 },
16378
16379 { MASK_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, 0, 0 },
16380
16381 { MASK_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, 0, 0 },
16382 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, 0, 0 },
16383 { MASK_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, 0, 0 },
16384 { MASK_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, 0, 0 },
16385
16386 /* SSE3 MMX */
16387 { MASK_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, 0, 0 },
16388 { MASK_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, 0, 0 },
16389 { MASK_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, 0, 0 },
16390 { MASK_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, 0, 0 },
16391 { MASK_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, 0, 0 },
16392 { MASK_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, 0, 0 },
16393
16394 /* SSSE3 */
16395 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, 0, 0 },
16396 { MASK_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, 0, 0 },
16397 { MASK_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, 0, 0 },
16398 { MASK_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, 0, 0 },
16399 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, 0, 0 },
16400 { MASK_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, 0, 0 },
16401 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, 0, 0 },
16402 { MASK_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, 0, 0 },
16403 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, 0, 0 },
16404 { MASK_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, 0, 0 },
16405 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, 0, 0 },
16406 { MASK_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, 0, 0 },
16407 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, 0, 0 },
16408 { MASK_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, 0, 0 },
16409 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, 0, 0 },
16410 { MASK_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, 0, 0 },
16411 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, 0, 0 },
16412 { MASK_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, 0, 0 },
16413 { MASK_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, 0, 0 },
16414 { MASK_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, 0, 0 },
16415 { MASK_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, 0, 0 },
16416 { MASK_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, 0, 0 },
16417 { MASK_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, 0, 0 },
16418 { MASK_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, 0, 0 }
16419 };
16420
16421 static const struct builtin_description bdesc_1arg[] =
16422 {
16423 { MASK_SSE | MASK_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, 0, 0 },
16424 { MASK_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, 0, 0 },
16425
16426 { MASK_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, 0, 0 },
16427 { MASK_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, 0, 0 },
16428 { MASK_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, 0, 0 },
16429
16430 { MASK_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, 0, 0 },
16431 { MASK_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, 0, 0 },
16432 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, 0, 0 },
16433 { MASK_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, 0, 0 },
16434 { MASK_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, 0, 0 },
16435 { MASK_SSE | MASK_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, 0, 0 },
16436
16437 { MASK_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, 0, 0 },
16438 { MASK_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, 0, 0 },
16439
16440 { MASK_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, 0, 0 },
16441
16442 { MASK_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, 0, 0 },
16443 { MASK_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, 0, 0 },
16444
16445 { MASK_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, 0, 0 },
16446 { MASK_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, 0, 0 },
16447 { MASK_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, 0, 0 },
16448 { MASK_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, 0, 0 },
16449 { MASK_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, 0, 0 },
16450
16451 { MASK_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, 0, 0 },
16452
16453 { MASK_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, 0, 0 },
16454 { MASK_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, 0, 0 },
16455 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, 0, 0 },
16456 { MASK_SSE2 | MASK_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, 0, 0 },
16457
16458 { MASK_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, 0, 0 },
16459 { MASK_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, 0, 0 },
16460 { MASK_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, 0, 0 },
16461
16462 /* SSE3 */
16463 { MASK_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, 0, 0 },
16464 { MASK_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, 0, 0 },
16465
16466 /* SSSE3 */
16467 { MASK_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, 0, 0 },
16468 { MASK_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, 0, 0 },
16469 { MASK_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, 0, 0 },
16470 { MASK_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, 0, 0 },
16471 { MASK_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, 0, 0 },
16472 { MASK_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, 0, 0 },
16473 };
16474
16475 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
16476 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
16477 builtins. */
16478 static void
16479 ix86_init_mmx_sse_builtins (void)
16480 {
16481 const struct builtin_description * d;
16482 size_t i;
16483
16484 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
16485 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
16486 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
16487 tree V2DI_type_node
16488 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
16489 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
16490 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
16491 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
16492 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
16493 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
16494 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
16495
16496 tree pchar_type_node = build_pointer_type (char_type_node);
16497 tree pcchar_type_node = build_pointer_type (
16498 build_type_variant (char_type_node, 1, 0));
16499 tree pfloat_type_node = build_pointer_type (float_type_node);
16500 tree pcfloat_type_node = build_pointer_type (
16501 build_type_variant (float_type_node, 1, 0));
16502 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
16503 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
16504 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
16505
16506 /* Comparisons. */
16507 tree int_ftype_v4sf_v4sf
16508 = build_function_type_list (integer_type_node,
16509 V4SF_type_node, V4SF_type_node, NULL_TREE);
16510 tree v4si_ftype_v4sf_v4sf
16511 = build_function_type_list (V4SI_type_node,
16512 V4SF_type_node, V4SF_type_node, NULL_TREE);
16513 /* MMX/SSE/integer conversions. */
16514 tree int_ftype_v4sf
16515 = build_function_type_list (integer_type_node,
16516 V4SF_type_node, NULL_TREE);
16517 tree int64_ftype_v4sf
16518 = build_function_type_list (long_long_integer_type_node,
16519 V4SF_type_node, NULL_TREE);
16520 tree int_ftype_v8qi
16521 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
16522 tree v4sf_ftype_v4sf_int
16523 = build_function_type_list (V4SF_type_node,
16524 V4SF_type_node, integer_type_node, NULL_TREE);
16525 tree v4sf_ftype_v4sf_int64
16526 = build_function_type_list (V4SF_type_node,
16527 V4SF_type_node, long_long_integer_type_node,
16528 NULL_TREE);
16529 tree v4sf_ftype_v4sf_v2si
16530 = build_function_type_list (V4SF_type_node,
16531 V4SF_type_node, V2SI_type_node, NULL_TREE);
16532
16533 /* Miscellaneous. */
16534 tree v8qi_ftype_v4hi_v4hi
16535 = build_function_type_list (V8QI_type_node,
16536 V4HI_type_node, V4HI_type_node, NULL_TREE);
16537 tree v4hi_ftype_v2si_v2si
16538 = build_function_type_list (V4HI_type_node,
16539 V2SI_type_node, V2SI_type_node, NULL_TREE);
16540 tree v4sf_ftype_v4sf_v4sf_int
16541 = build_function_type_list (V4SF_type_node,
16542 V4SF_type_node, V4SF_type_node,
16543 integer_type_node, NULL_TREE);
16544 tree v2si_ftype_v4hi_v4hi
16545 = build_function_type_list (V2SI_type_node,
16546 V4HI_type_node, V4HI_type_node, NULL_TREE);
16547 tree v4hi_ftype_v4hi_int
16548 = build_function_type_list (V4HI_type_node,
16549 V4HI_type_node, integer_type_node, NULL_TREE);
16550 tree v4hi_ftype_v4hi_di
16551 = build_function_type_list (V4HI_type_node,
16552 V4HI_type_node, long_long_unsigned_type_node,
16553 NULL_TREE);
16554 tree v2si_ftype_v2si_di
16555 = build_function_type_list (V2SI_type_node,
16556 V2SI_type_node, long_long_unsigned_type_node,
16557 NULL_TREE);
16558 tree void_ftype_void
16559 = build_function_type (void_type_node, void_list_node);
16560 tree void_ftype_unsigned
16561 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
16562 tree void_ftype_unsigned_unsigned
16563 = build_function_type_list (void_type_node, unsigned_type_node,
16564 unsigned_type_node, NULL_TREE);
16565 tree void_ftype_pcvoid_unsigned_unsigned
16566 = build_function_type_list (void_type_node, const_ptr_type_node,
16567 unsigned_type_node, unsigned_type_node,
16568 NULL_TREE);
16569 tree unsigned_ftype_void
16570 = build_function_type (unsigned_type_node, void_list_node);
16571 tree v2si_ftype_v4sf
16572 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
16573 /* Loads/stores. */
16574 tree void_ftype_v8qi_v8qi_pchar
16575 = build_function_type_list (void_type_node,
16576 V8QI_type_node, V8QI_type_node,
16577 pchar_type_node, NULL_TREE);
16578 tree v4sf_ftype_pcfloat
16579 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
16580 /* @@@ the type is bogus */
16581 tree v4sf_ftype_v4sf_pv2si
16582 = build_function_type_list (V4SF_type_node,
16583 V4SF_type_node, pv2si_type_node, NULL_TREE);
16584 tree void_ftype_pv2si_v4sf
16585 = build_function_type_list (void_type_node,
16586 pv2si_type_node, V4SF_type_node, NULL_TREE);
16587 tree void_ftype_pfloat_v4sf
16588 = build_function_type_list (void_type_node,
16589 pfloat_type_node, V4SF_type_node, NULL_TREE);
16590 tree void_ftype_pdi_di
16591 = build_function_type_list (void_type_node,
16592 pdi_type_node, long_long_unsigned_type_node,
16593 NULL_TREE);
16594 tree void_ftype_pv2di_v2di
16595 = build_function_type_list (void_type_node,
16596 pv2di_type_node, V2DI_type_node, NULL_TREE);
16597 /* Normal vector unops. */
16598 tree v4sf_ftype_v4sf
16599 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
16600 tree v16qi_ftype_v16qi
16601 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
16602 tree v8hi_ftype_v8hi
16603 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
16604 tree v4si_ftype_v4si
16605 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
16606 tree v8qi_ftype_v8qi
16607 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
16608 tree v4hi_ftype_v4hi
16609 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
16610
16611 /* Normal vector binops. */
16612 tree v4sf_ftype_v4sf_v4sf
16613 = build_function_type_list (V4SF_type_node,
16614 V4SF_type_node, V4SF_type_node, NULL_TREE);
16615 tree v8qi_ftype_v8qi_v8qi
16616 = build_function_type_list (V8QI_type_node,
16617 V8QI_type_node, V8QI_type_node, NULL_TREE);
16618 tree v4hi_ftype_v4hi_v4hi
16619 = build_function_type_list (V4HI_type_node,
16620 V4HI_type_node, V4HI_type_node, NULL_TREE);
16621 tree v2si_ftype_v2si_v2si
16622 = build_function_type_list (V2SI_type_node,
16623 V2SI_type_node, V2SI_type_node, NULL_TREE);
16624 tree di_ftype_di_di
16625 = build_function_type_list (long_long_unsigned_type_node,
16626 long_long_unsigned_type_node,
16627 long_long_unsigned_type_node, NULL_TREE);
16628
16629 tree di_ftype_di_di_int
16630 = build_function_type_list (long_long_unsigned_type_node,
16631 long_long_unsigned_type_node,
16632 long_long_unsigned_type_node,
16633 integer_type_node, NULL_TREE);
16634
16635 tree v2si_ftype_v2sf
16636 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
16637 tree v2sf_ftype_v2si
16638 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
16639 tree v2si_ftype_v2si
16640 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
16641 tree v2sf_ftype_v2sf
16642 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
16643 tree v2sf_ftype_v2sf_v2sf
16644 = build_function_type_list (V2SF_type_node,
16645 V2SF_type_node, V2SF_type_node, NULL_TREE);
16646 tree v2si_ftype_v2sf_v2sf
16647 = build_function_type_list (V2SI_type_node,
16648 V2SF_type_node, V2SF_type_node, NULL_TREE);
16649 tree pint_type_node = build_pointer_type (integer_type_node);
16650 tree pdouble_type_node = build_pointer_type (double_type_node);
16651 tree pcdouble_type_node = build_pointer_type (
16652 build_type_variant (double_type_node, 1, 0));
16653 tree int_ftype_v2df_v2df
16654 = build_function_type_list (integer_type_node,
16655 V2DF_type_node, V2DF_type_node, NULL_TREE);
16656
16657 tree void_ftype_pcvoid
16658 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
16659 tree v4sf_ftype_v4si
16660 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
16661 tree v4si_ftype_v4sf
16662 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
16663 tree v2df_ftype_v4si
16664 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
16665 tree v4si_ftype_v2df
16666 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
16667 tree v2si_ftype_v2df
16668 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
16669 tree v4sf_ftype_v2df
16670 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
16671 tree v2df_ftype_v2si
16672 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
16673 tree v2df_ftype_v4sf
16674 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
16675 tree int_ftype_v2df
16676 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
16677 tree int64_ftype_v2df
16678 = build_function_type_list (long_long_integer_type_node,
16679 V2DF_type_node, NULL_TREE);
16680 tree v2df_ftype_v2df_int
16681 = build_function_type_list (V2DF_type_node,
16682 V2DF_type_node, integer_type_node, NULL_TREE);
16683 tree v2df_ftype_v2df_int64
16684 = build_function_type_list (V2DF_type_node,
16685 V2DF_type_node, long_long_integer_type_node,
16686 NULL_TREE);
16687 tree v4sf_ftype_v4sf_v2df
16688 = build_function_type_list (V4SF_type_node,
16689 V4SF_type_node, V2DF_type_node, NULL_TREE);
16690 tree v2df_ftype_v2df_v4sf
16691 = build_function_type_list (V2DF_type_node,
16692 V2DF_type_node, V4SF_type_node, NULL_TREE);
16693 tree v2df_ftype_v2df_v2df_int
16694 = build_function_type_list (V2DF_type_node,
16695 V2DF_type_node, V2DF_type_node,
16696 integer_type_node,
16697 NULL_TREE);
16698 tree v2df_ftype_v2df_pcdouble
16699 = build_function_type_list (V2DF_type_node,
16700 V2DF_type_node, pcdouble_type_node, NULL_TREE);
16701 tree void_ftype_pdouble_v2df
16702 = build_function_type_list (void_type_node,
16703 pdouble_type_node, V2DF_type_node, NULL_TREE);
16704 tree void_ftype_pint_int
16705 = build_function_type_list (void_type_node,
16706 pint_type_node, integer_type_node, NULL_TREE);
16707 tree void_ftype_v16qi_v16qi_pchar
16708 = build_function_type_list (void_type_node,
16709 V16QI_type_node, V16QI_type_node,
16710 pchar_type_node, NULL_TREE);
16711 tree v2df_ftype_pcdouble
16712 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
16713 tree v2df_ftype_v2df_v2df
16714 = build_function_type_list (V2DF_type_node,
16715 V2DF_type_node, V2DF_type_node, NULL_TREE);
16716 tree v16qi_ftype_v16qi_v16qi
16717 = build_function_type_list (V16QI_type_node,
16718 V16QI_type_node, V16QI_type_node, NULL_TREE);
16719 tree v8hi_ftype_v8hi_v8hi
16720 = build_function_type_list (V8HI_type_node,
16721 V8HI_type_node, V8HI_type_node, NULL_TREE);
16722 tree v4si_ftype_v4si_v4si
16723 = build_function_type_list (V4SI_type_node,
16724 V4SI_type_node, V4SI_type_node, NULL_TREE);
16725 tree v2di_ftype_v2di_v2di
16726 = build_function_type_list (V2DI_type_node,
16727 V2DI_type_node, V2DI_type_node, NULL_TREE);
16728 tree v2di_ftype_v2df_v2df
16729 = build_function_type_list (V2DI_type_node,
16730 V2DF_type_node, V2DF_type_node, NULL_TREE);
16731 tree v2df_ftype_v2df
16732 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
16733 tree v2di_ftype_v2di_int
16734 = build_function_type_list (V2DI_type_node,
16735 V2DI_type_node, integer_type_node, NULL_TREE);
16736 tree v2di_ftype_v2di_v2di_int
16737 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16738 V2DI_type_node, integer_type_node, NULL_TREE);
16739 tree v4si_ftype_v4si_int
16740 = build_function_type_list (V4SI_type_node,
16741 V4SI_type_node, integer_type_node, NULL_TREE);
16742 tree v8hi_ftype_v8hi_int
16743 = build_function_type_list (V8HI_type_node,
16744 V8HI_type_node, integer_type_node, NULL_TREE);
16745 tree v4si_ftype_v8hi_v8hi
16746 = build_function_type_list (V4SI_type_node,
16747 V8HI_type_node, V8HI_type_node, NULL_TREE);
16748 tree di_ftype_v8qi_v8qi
16749 = build_function_type_list (long_long_unsigned_type_node,
16750 V8QI_type_node, V8QI_type_node, NULL_TREE);
16751 tree di_ftype_v2si_v2si
16752 = build_function_type_list (long_long_unsigned_type_node,
16753 V2SI_type_node, V2SI_type_node, NULL_TREE);
16754 tree v2di_ftype_v16qi_v16qi
16755 = build_function_type_list (V2DI_type_node,
16756 V16QI_type_node, V16QI_type_node, NULL_TREE);
16757 tree v2di_ftype_v4si_v4si
16758 = build_function_type_list (V2DI_type_node,
16759 V4SI_type_node, V4SI_type_node, NULL_TREE);
16760 tree int_ftype_v16qi
16761 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
16762 tree v16qi_ftype_pcchar
16763 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
16764 tree void_ftype_pchar_v16qi
16765 = build_function_type_list (void_type_node,
16766 pchar_type_node, V16QI_type_node, NULL_TREE);
16767
16768 tree v2di_ftype_v2di_unsigned_unsigned
16769 = build_function_type_list (V2DI_type_node, V2DI_type_node,
16770 unsigned_type_node, unsigned_type_node,
16771 NULL_TREE);
16772 tree v2di_ftype_v2di_v2di_unsigned_unsigned
16773 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
16774 unsigned_type_node, unsigned_type_node,
16775 NULL_TREE);
16776 tree v2di_ftype_v2di_v16qi
16777 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
16778 NULL_TREE);
16779
16780 tree float80_type;
16781 tree float128_type;
16782 tree ftype;
16783
16784 /* The __float80 type. */
16785 if (TYPE_MODE (long_double_type_node) == XFmode)
16786 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
16787 "__float80");
16788 else
16789 {
16790 /* The __float80 type. */
16791 float80_type = make_node (REAL_TYPE);
16792 TYPE_PRECISION (float80_type) = 80;
16793 layout_type (float80_type);
16794 (*lang_hooks.types.register_builtin_type) (float80_type, "__float80");
16795 }
16796
16797 if (TARGET_64BIT)
16798 {
16799 float128_type = make_node (REAL_TYPE);
16800 TYPE_PRECISION (float128_type) = 128;
16801 layout_type (float128_type);
16802 (*lang_hooks.types.register_builtin_type) (float128_type, "__float128");
16803 }
16804
16805 /* Add all builtins that are more or less simple operations on two
16806 operands. */
16807 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
16808 {
16809 /* Use one of the operands; the target can have a different mode for
16810 mask-generating compares. */
16811 enum machine_mode mode;
16812 tree type;
16813
16814 if (d->name == 0)
16815 continue;
16816 mode = insn_data[d->icode].operand[1].mode;
16817
16818 switch (mode)
16819 {
16820 case V16QImode:
16821 type = v16qi_ftype_v16qi_v16qi;
16822 break;
16823 case V8HImode:
16824 type = v8hi_ftype_v8hi_v8hi;
16825 break;
16826 case V4SImode:
16827 type = v4si_ftype_v4si_v4si;
16828 break;
16829 case V2DImode:
16830 type = v2di_ftype_v2di_v2di;
16831 break;
16832 case V2DFmode:
16833 type = v2df_ftype_v2df_v2df;
16834 break;
16835 case V4SFmode:
16836 type = v4sf_ftype_v4sf_v4sf;
16837 break;
16838 case V8QImode:
16839 type = v8qi_ftype_v8qi_v8qi;
16840 break;
16841 case V4HImode:
16842 type = v4hi_ftype_v4hi_v4hi;
16843 break;
16844 case V2SImode:
16845 type = v2si_ftype_v2si_v2si;
16846 break;
16847 case DImode:
16848 type = di_ftype_di_di;
16849 break;
16850
16851 default:
16852 gcc_unreachable ();
16853 }
16854
16855 /* Override for comparisons. */
16856 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
16857 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
16858 type = v4si_ftype_v4sf_v4sf;
16859
16860 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
16861 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
16862 type = v2di_ftype_v2df_v2df;
16863
16864 def_builtin (d->mask, d->name, type, d->code);
16865 }
16866
16867 /* Add all builtins that are more or less simple operations on 1 operand. */
16868 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
16869 {
16870 enum machine_mode mode;
16871 tree type;
16872
16873 if (d->name == 0)
16874 continue;
16875 mode = insn_data[d->icode].operand[1].mode;
16876
16877 switch (mode)
16878 {
16879 case V16QImode:
16880 type = v16qi_ftype_v16qi;
16881 break;
16882 case V8HImode:
16883 type = v8hi_ftype_v8hi;
16884 break;
16885 case V4SImode:
16886 type = v4si_ftype_v4si;
16887 break;
16888 case V2DFmode:
16889 type = v2df_ftype_v2df;
16890 break;
16891 case V4SFmode:
16892 type = v4sf_ftype_v4sf;
16893 break;
16894 case V8QImode:
16895 type = v8qi_ftype_v8qi;
16896 break;
16897 case V4HImode:
16898 type = v4hi_ftype_v4hi;
16899 break;
16900 case V2SImode:
16901 type = v2si_ftype_v2si;
16902 break;
16903
16904 default:
16905 abort ();
16906 }
16907
16908 def_builtin (d->mask, d->name, type, d->code);
16909 }
16910
16911 /* Add the remaining MMX insns with somewhat more complicated types. */
16912 def_builtin (MASK_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
16913 def_builtin (MASK_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
16914 def_builtin (MASK_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
16915 def_builtin (MASK_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
16916
16917 def_builtin (MASK_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
16918 def_builtin (MASK_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
16919 def_builtin (MASK_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
16920
16921 def_builtin (MASK_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
16922 def_builtin (MASK_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
16923
16924 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
16925 def_builtin (MASK_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
16926
16927 /* comi/ucomi insns. */
16928 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
16929 if (d->mask == MASK_SSE2)
16930 def_builtin (d->mask, d->name, int_ftype_v2df_v2df, d->code);
16931 else
16932 def_builtin (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
16933
16934 def_builtin (MASK_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
16935 def_builtin (MASK_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
16936 def_builtin (MASK_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
16937
16938 def_builtin (MASK_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
16939 def_builtin (MASK_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
16940 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
16941 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
16942 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
16943 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
16944 def_builtin_const (MASK_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
16945 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
16946 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
16947 def_builtin_const (MASK_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
16948 def_builtin_const (MASK_SSE | MASK_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
16949
16950 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
16951
16952 def_builtin (MASK_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
16953 def_builtin (MASK_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
16954
16955 def_builtin (MASK_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
16956 def_builtin (MASK_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
16957 def_builtin (MASK_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
16958 def_builtin (MASK_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
16959
16960 def_builtin (MASK_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
16961 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
16962 def_builtin (MASK_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
16963 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
16964
16965 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
16966
16967 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
16968
16969 def_builtin (MASK_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
16970 def_builtin (MASK_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
16971 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
16972 def_builtin (MASK_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
16973 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
16974 def_builtin_const (MASK_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
16975
16976 def_builtin (MASK_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
16977
16978 /* Original 3DNow! */
16979 def_builtin (MASK_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
16980 def_builtin (MASK_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
16981 def_builtin (MASK_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
16982 def_builtin (MASK_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
16983 def_builtin (MASK_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
16984 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
16985 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
16986 def_builtin (MASK_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
16987 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
16988 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
16989 def_builtin (MASK_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
16990 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
16991 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
16992 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
16993 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
16994 def_builtin (MASK_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
16995 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
16996 def_builtin (MASK_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
16997 def_builtin (MASK_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
16998 def_builtin (MASK_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
16999
17000 /* 3DNow! extension as used in the Athlon CPU. */
17001 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
17002 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
17003 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
17004 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
17005 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
17006 def_builtin (MASK_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
17007
17008 /* SSE2 */
17009 def_builtin (MASK_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
17010
17011 def_builtin (MASK_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
17012 def_builtin (MASK_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
17013
17014 def_builtin (MASK_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
17015 def_builtin (MASK_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
17016
17017 def_builtin (MASK_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
17018 def_builtin (MASK_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
17019 def_builtin (MASK_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
17020 def_builtin (MASK_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
17021 def_builtin (MASK_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
17022
17023 def_builtin (MASK_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
17024 def_builtin (MASK_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
17025 def_builtin (MASK_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
17026 def_builtin (MASK_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
17027
17028 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
17029 def_builtin_const (MASK_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
17030
17031 def_builtin (MASK_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
17032
17033 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
17034 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
17035
17036 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
17037 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
17038 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
17039 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
17040 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
17041
17042 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
17043
17044 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
17045 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
17046 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
17047 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
17048
17049 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
17050 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
17051 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
17052
17053 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
17054 def_builtin_const (MASK_SSE2 | MASK_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
17055 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
17056 def_builtin_const (MASK_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
17057
17058 def_builtin (MASK_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
17059 def_builtin (MASK_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
17060 def_builtin (MASK_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
17061
17062 def_builtin (MASK_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
17063 def_builtin (MASK_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
17064
17065 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
17066 def_builtin (MASK_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
17067
17068 def_builtin (MASK_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
17069 def_builtin (MASK_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
17070 def_builtin (MASK_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
17071 def_builtin (MASK_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
17072 def_builtin (MASK_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
17073 def_builtin (MASK_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
17074 def_builtin (MASK_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
17075
17076 def_builtin (MASK_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
17077 def_builtin (MASK_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
17078 def_builtin (MASK_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
17079 def_builtin (MASK_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
17080 def_builtin (MASK_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
17081 def_builtin (MASK_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
17082 def_builtin (MASK_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
17083
17084 def_builtin (MASK_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
17085 def_builtin (MASK_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
17086 def_builtin (MASK_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
17087 def_builtin (MASK_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
17088
17089 def_builtin (MASK_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
17090
17091 /* Prescott New Instructions. */
17092 def_builtin (MASK_SSE3, "__builtin_ia32_monitor",
17093 void_ftype_pcvoid_unsigned_unsigned,
17094 IX86_BUILTIN_MONITOR);
17095 def_builtin (MASK_SSE3, "__builtin_ia32_mwait",
17096 void_ftype_unsigned_unsigned,
17097 IX86_BUILTIN_MWAIT);
17098 def_builtin (MASK_SSE3, "__builtin_ia32_lddqu",
17099 v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
17100
17101 /* SSSE3. */
17102 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr128",
17103 v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
17104 def_builtin (MASK_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int,
17105 IX86_BUILTIN_PALIGNR);
17106
17107 /* AMDFAM10 SSE4A New built-ins */
17108 def_builtin (MASK_SSE4A, "__builtin_ia32_movntsd",
17109 void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
17110 def_builtin (MASK_SSE4A, "__builtin_ia32_movntss",
17111 void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
17112 def_builtin (MASK_SSE4A, "__builtin_ia32_extrqi",
17113 v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
17114 def_builtin (MASK_SSE4A, "__builtin_ia32_extrq",
17115 v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
17116 def_builtin (MASK_SSE4A, "__builtin_ia32_insertqi",
17117 v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
17118 def_builtin (MASK_SSE4A, "__builtin_ia32_insertq",
17119 v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
17120
17121 /* Access to the vec_init patterns. */
17122 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
17123 integer_type_node, NULL_TREE);
17124 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v2si",
17125 ftype, IX86_BUILTIN_VEC_INIT_V2SI);
17126
17127 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
17128 short_integer_type_node,
17129 short_integer_type_node,
17130 short_integer_type_node, NULL_TREE);
17131 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v4hi",
17132 ftype, IX86_BUILTIN_VEC_INIT_V4HI);
17133
17134 ftype = build_function_type_list (V8QI_type_node, char_type_node,
17135 char_type_node, char_type_node,
17136 char_type_node, char_type_node,
17137 char_type_node, char_type_node,
17138 char_type_node, NULL_TREE);
17139 def_builtin (MASK_MMX, "__builtin_ia32_vec_init_v8qi",
17140 ftype, IX86_BUILTIN_VEC_INIT_V8QI);
17141
17142 /* Access to the vec_extract patterns. */
17143 ftype = build_function_type_list (double_type_node, V2DF_type_node,
17144 integer_type_node, NULL_TREE);
17145 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2df",
17146 ftype, IX86_BUILTIN_VEC_EXT_V2DF);
17147
17148 ftype = build_function_type_list (long_long_integer_type_node,
17149 V2DI_type_node, integer_type_node,
17150 NULL_TREE);
17151 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v2di",
17152 ftype, IX86_BUILTIN_VEC_EXT_V2DI);
17153
17154 ftype = build_function_type_list (float_type_node, V4SF_type_node,
17155 integer_type_node, NULL_TREE);
17156 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4sf",
17157 ftype, IX86_BUILTIN_VEC_EXT_V4SF);
17158
17159 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
17160 integer_type_node, NULL_TREE);
17161 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v4si",
17162 ftype, IX86_BUILTIN_VEC_EXT_V4SI);
17163
17164 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
17165 integer_type_node, NULL_TREE);
17166 def_builtin (MASK_SSE, "__builtin_ia32_vec_ext_v8hi",
17167 ftype, IX86_BUILTIN_VEC_EXT_V8HI);
17168
17169 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
17170 integer_type_node, NULL_TREE);
17171 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_ext_v4hi",
17172 ftype, IX86_BUILTIN_VEC_EXT_V4HI);
17173
17174 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
17175 integer_type_node, NULL_TREE);
17176 def_builtin (MASK_MMX, "__builtin_ia32_vec_ext_v2si",
17177 ftype, IX86_BUILTIN_VEC_EXT_V2SI);
17178
17179 /* Access to the vec_set patterns. */
17180 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
17181 intHI_type_node,
17182 integer_type_node, NULL_TREE);
17183 def_builtin (MASK_SSE, "__builtin_ia32_vec_set_v8hi",
17184 ftype, IX86_BUILTIN_VEC_SET_V8HI);
17185
17186 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
17187 intHI_type_node,
17188 integer_type_node, NULL_TREE);
17189 def_builtin (MASK_SSE | MASK_3DNOW_A, "__builtin_ia32_vec_set_v4hi",
17190 ftype, IX86_BUILTIN_VEC_SET_V4HI);
17191 }
17192
17193 static void
17194 ix86_init_builtins (void)
17195 {
17196 if (TARGET_MMX)
17197 ix86_init_mmx_sse_builtins ();
17198 }
17199
17200 /* Errors in the source file can cause expand_expr to return const0_rtx
17201 where we expect a vector. To avoid crashing, use one of the vector
17202 clear instructions. */
17203 static rtx
17204 safe_vector_operand (rtx x, enum machine_mode mode)
17205 {
17206 if (x == const0_rtx)
17207 x = CONST0_RTX (mode);
17208 return x;
17209 }
17210
17211 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
17212
17213 static rtx
17214 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
17215 {
17216 rtx pat, xops[3];
17217 tree arg0 = CALL_EXPR_ARG (exp, 0);
17218 tree arg1 = CALL_EXPR_ARG (exp, 1);
17219 rtx op0 = expand_normal (arg0);
17220 rtx op1 = expand_normal (arg1);
17221 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17222 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17223 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
17224
17225 if (VECTOR_MODE_P (mode0))
17226 op0 = safe_vector_operand (op0, mode0);
17227 if (VECTOR_MODE_P (mode1))
17228 op1 = safe_vector_operand (op1, mode1);
17229
17230 if (optimize || !target
17231 || GET_MODE (target) != tmode
17232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17233 target = gen_reg_rtx (tmode);
17234
17235 if (GET_MODE (op1) == SImode && mode1 == TImode)
17236 {
17237 rtx x = gen_reg_rtx (V4SImode);
17238 emit_insn (gen_sse2_loadd (x, op1));
17239 op1 = gen_lowpart (TImode, x);
17240 }
17241
17242 /* The insn must want input operands in the same modes as the
17243 result. */
17244 gcc_assert ((GET_MODE (op0) == mode0 || GET_MODE (op0) == VOIDmode)
17245 && (GET_MODE (op1) == mode1 || GET_MODE (op1) == VOIDmode));
17246
17247 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
17248 op0 = copy_to_mode_reg (mode0, op0);
17249 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
17250 op1 = copy_to_mode_reg (mode1, op1);
17251
17252 /* ??? Using ix86_fixup_binary_operands is problematic when
17253 we've got mismatched modes. Fake it. */
17254
17255 xops[0] = target;
17256 xops[1] = op0;
17257 xops[2] = op1;
17258
17259 if (tmode == mode0 && tmode == mode1)
17260 {
17261 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
17262 op0 = xops[1];
17263 op1 = xops[2];
17264 }
17265 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
17266 {
17267 op0 = force_reg (mode0, op0);
17268 op1 = force_reg (mode1, op1);
17269 target = gen_reg_rtx (tmode);
17270 }
17271
17272 pat = GEN_FCN (icode) (target, op0, op1);
17273 if (! pat)
17274 return 0;
17275 emit_insn (pat);
17276 return target;
17277 }
17278
17279 /* Subroutine of ix86_expand_builtin to take care of stores. */
17280
17281 static rtx
17282 ix86_expand_store_builtin (enum insn_code icode, tree exp)
17283 {
17284 rtx pat;
17285 tree arg0 = CALL_EXPR_ARG (exp, 0);
17286 tree arg1 = CALL_EXPR_ARG (exp, 1);
17287 rtx op0 = expand_normal (arg0);
17288 rtx op1 = expand_normal (arg1);
17289 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
17290 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
17291
17292 if (VECTOR_MODE_P (mode1))
17293 op1 = safe_vector_operand (op1, mode1);
17294
17295 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17296 op1 = copy_to_mode_reg (mode1, op1);
17297
17298 pat = GEN_FCN (icode) (op0, op1);
17299 if (pat)
17300 emit_insn (pat);
17301 return 0;
17302 }
17303
17304 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
17305
17306 static rtx
17307 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
17308 rtx target, int do_load)
17309 {
17310 rtx pat;
17311 tree arg0 = CALL_EXPR_ARG (exp, 0);
17312 rtx op0 = expand_normal (arg0);
17313 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17314 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17315
17316 if (optimize || !target
17317 || GET_MODE (target) != tmode
17318 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17319 target = gen_reg_rtx (tmode);
17320 if (do_load)
17321 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17322 else
17323 {
17324 if (VECTOR_MODE_P (mode0))
17325 op0 = safe_vector_operand (op0, mode0);
17326
17327 if ((optimize && !register_operand (op0, mode0))
17328 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17329 op0 = copy_to_mode_reg (mode0, op0);
17330 }
17331
17332 pat = GEN_FCN (icode) (target, op0);
17333 if (! pat)
17334 return 0;
17335 emit_insn (pat);
17336 return target;
17337 }
17338
17339 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
17340 sqrtss, rsqrtss, rcpss. */
17341
17342 static rtx
17343 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
17344 {
17345 rtx pat;
17346 tree arg0 = CALL_EXPR_ARG (exp, 0);
17347 rtx op1, op0 = expand_normal (arg0);
17348 enum machine_mode tmode = insn_data[icode].operand[0].mode;
17349 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
17350
17351 if (optimize || !target
17352 || GET_MODE (target) != tmode
17353 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17354 target = gen_reg_rtx (tmode);
17355
17356 if (VECTOR_MODE_P (mode0))
17357 op0 = safe_vector_operand (op0, mode0);
17358
17359 if ((optimize && !register_operand (op0, mode0))
17360 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17361 op0 = copy_to_mode_reg (mode0, op0);
17362
17363 op1 = op0;
17364 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
17365 op1 = copy_to_mode_reg (mode0, op1);
17366
17367 pat = GEN_FCN (icode) (target, op0, op1);
17368 if (! pat)
17369 return 0;
17370 emit_insn (pat);
17371 return target;
17372 }
17373
17374 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
17375
17376 static rtx
17377 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
17378 rtx target)
17379 {
17380 rtx pat;
17381 tree arg0 = CALL_EXPR_ARG (exp, 0);
17382 tree arg1 = CALL_EXPR_ARG (exp, 1);
17383 rtx op0 = expand_normal (arg0);
17384 rtx op1 = expand_normal (arg1);
17385 rtx op2;
17386 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
17387 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
17388 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
17389 enum rtx_code comparison = d->comparison;
17390
17391 if (VECTOR_MODE_P (mode0))
17392 op0 = safe_vector_operand (op0, mode0);
17393 if (VECTOR_MODE_P (mode1))
17394 op1 = safe_vector_operand (op1, mode1);
17395
17396 /* Swap operands if we have a comparison that isn't available in
17397 hardware. */
17398 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17399 {
17400 rtx tmp = gen_reg_rtx (mode1);
17401 emit_move_insn (tmp, op1);
17402 op1 = op0;
17403 op0 = tmp;
17404 }
17405
17406 if (optimize || !target
17407 || GET_MODE (target) != tmode
17408 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
17409 target = gen_reg_rtx (tmode);
17410
17411 if ((optimize && !register_operand (op0, mode0))
17412 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
17413 op0 = copy_to_mode_reg (mode0, op0);
17414 if ((optimize && !register_operand (op1, mode1))
17415 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
17416 op1 = copy_to_mode_reg (mode1, op1);
17417
17418 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17419 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
17420 if (! pat)
17421 return 0;
17422 emit_insn (pat);
17423 return target;
17424 }
17425
17426 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
17427
17428 static rtx
17429 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
17430 rtx target)
17431 {
17432 rtx pat;
17433 tree arg0 = CALL_EXPR_ARG (exp, 0);
17434 tree arg1 = CALL_EXPR_ARG (exp, 1);
17435 rtx op0 = expand_normal (arg0);
17436 rtx op1 = expand_normal (arg1);
17437 rtx op2;
17438 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
17439 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
17440 enum rtx_code comparison = d->comparison;
17441
17442 if (VECTOR_MODE_P (mode0))
17443 op0 = safe_vector_operand (op0, mode0);
17444 if (VECTOR_MODE_P (mode1))
17445 op1 = safe_vector_operand (op1, mode1);
17446
17447 /* Swap operands if we have a comparison that isn't available in
17448 hardware. */
17449 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
17450 {
17451 rtx tmp = op1;
17452 op1 = op0;
17453 op0 = tmp;
17454 }
17455
17456 target = gen_reg_rtx (SImode);
17457 emit_move_insn (target, const0_rtx);
17458 target = gen_rtx_SUBREG (QImode, target, 0);
17459
17460 if ((optimize && !register_operand (op0, mode0))
17461 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
17462 op0 = copy_to_mode_reg (mode0, op0);
17463 if ((optimize && !register_operand (op1, mode1))
17464 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
17465 op1 = copy_to_mode_reg (mode1, op1);
17466
17467 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
17468 pat = GEN_FCN (d->icode) (op0, op1);
17469 if (! pat)
17470 return 0;
17471 emit_insn (pat);
17472 emit_insn (gen_rtx_SET (VOIDmode,
17473 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
17474 gen_rtx_fmt_ee (comparison, QImode,
17475 SET_DEST (pat),
17476 const0_rtx)));
17477
17478 return SUBREG_REG (target);
17479 }
17480
17481 /* Return the integer constant in ARG. Constrain it to be in the range
17482 of the subparts of VEC_TYPE; issue an error if not. */
17483
17484 static int
17485 get_element_number (tree vec_type, tree arg)
17486 {
17487 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
17488
17489 if (!host_integerp (arg, 1)
17490 || (elt = tree_low_cst (arg, 1), elt > max))
17491 {
17492 error ("selector must be an integer constant in the range 0..%wi", max);
17493 return 0;
17494 }
17495
17496 return elt;
17497 }
17498
17499 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17500 ix86_expand_vector_init. We DO have language-level syntax for this, in
17501 the form of (type){ init-list }. Except that since we can't place emms
17502 instructions from inside the compiler, we can't allow the use of MMX
17503 registers unless the user explicitly asks for it. So we do *not* define
17504 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
17505 we have builtins invoked by mmintrin.h that gives us license to emit
17506 these sorts of instructions. */
17507
17508 static rtx
17509 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
17510 {
17511 enum machine_mode tmode = TYPE_MODE (type);
17512 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
17513 int i, n_elt = GET_MODE_NUNITS (tmode);
17514 rtvec v = rtvec_alloc (n_elt);
17515
17516 gcc_assert (VECTOR_MODE_P (tmode));
17517 gcc_assert (call_expr_nargs (exp) == n_elt);
17518
17519 for (i = 0; i < n_elt; ++i)
17520 {
17521 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
17522 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
17523 }
17524
17525 if (!target || !register_operand (target, tmode))
17526 target = gen_reg_rtx (tmode);
17527
17528 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
17529 return target;
17530 }
17531
17532 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17533 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
17534 had a language-level syntax for referencing vector elements. */
17535
17536 static rtx
17537 ix86_expand_vec_ext_builtin (tree exp, rtx target)
17538 {
17539 enum machine_mode tmode, mode0;
17540 tree arg0, arg1;
17541 int elt;
17542 rtx op0;
17543
17544 arg0 = CALL_EXPR_ARG (exp, 0);
17545 arg1 = CALL_EXPR_ARG (exp, 1);
17546
17547 op0 = expand_normal (arg0);
17548 elt = get_element_number (TREE_TYPE (arg0), arg1);
17549
17550 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17551 mode0 = TYPE_MODE (TREE_TYPE (arg0));
17552 gcc_assert (VECTOR_MODE_P (mode0));
17553
17554 op0 = force_reg (mode0, op0);
17555
17556 if (optimize || !target || !register_operand (target, tmode))
17557 target = gen_reg_rtx (tmode);
17558
17559 ix86_expand_vector_extract (true, target, op0, elt);
17560
17561 return target;
17562 }
17563
17564 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
17565 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
17566 a language-level syntax for referencing vector elements. */
17567
17568 static rtx
17569 ix86_expand_vec_set_builtin (tree exp)
17570 {
17571 enum machine_mode tmode, mode1;
17572 tree arg0, arg1, arg2;
17573 int elt;
17574 rtx op0, op1;
17575
17576 arg0 = CALL_EXPR_ARG (exp, 0);
17577 arg1 = CALL_EXPR_ARG (exp, 1);
17578 arg2 = CALL_EXPR_ARG (exp, 2);
17579
17580 tmode = TYPE_MODE (TREE_TYPE (arg0));
17581 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
17582 gcc_assert (VECTOR_MODE_P (tmode));
17583
17584 op0 = expand_expr (arg0, NULL_RTX, tmode, 0);
17585 op1 = expand_expr (arg1, NULL_RTX, mode1, 0);
17586 elt = get_element_number (TREE_TYPE (arg0), arg2);
17587
17588 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
17589 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
17590
17591 op0 = force_reg (tmode, op0);
17592 op1 = force_reg (mode1, op1);
17593
17594 ix86_expand_vector_set (true, op0, op1, elt);
17595
17596 return op0;
17597 }
17598
17599 /* Expand an expression EXP that calls a built-in function,
17600 with result going to TARGET if that's convenient
17601 (and in mode MODE if that's convenient).
17602 SUBTARGET may be used as the target for computing one of EXP's operands.
17603 IGNORE is nonzero if the value is to be ignored. */
17604
17605 static rtx
17606 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
17607 enum machine_mode mode ATTRIBUTE_UNUSED,
17608 int ignore ATTRIBUTE_UNUSED)
17609 {
17610 const struct builtin_description *d;
17611 size_t i;
17612 enum insn_code icode;
17613 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
17614 tree arg0, arg1, arg2, arg3;
17615 rtx op0, op1, op2, op3, pat;
17616 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
17617 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
17618
17619 switch (fcode)
17620 {
17621 case IX86_BUILTIN_EMMS:
17622 emit_insn (gen_mmx_emms ());
17623 return 0;
17624
17625 case IX86_BUILTIN_SFENCE:
17626 emit_insn (gen_sse_sfence ());
17627 return 0;
17628
17629 case IX86_BUILTIN_MASKMOVQ:
17630 case IX86_BUILTIN_MASKMOVDQU:
17631 icode = (fcode == IX86_BUILTIN_MASKMOVQ
17632 ? CODE_FOR_mmx_maskmovq
17633 : CODE_FOR_sse2_maskmovdqu);
17634 /* Note the arg order is different from the operand order. */
17635 arg1 = CALL_EXPR_ARG (exp, 0);
17636 arg2 = CALL_EXPR_ARG (exp, 1);
17637 arg0 = CALL_EXPR_ARG (exp, 2);
17638 op0 = expand_normal (arg0);
17639 op1 = expand_normal (arg1);
17640 op2 = expand_normal (arg2);
17641 mode0 = insn_data[icode].operand[0].mode;
17642 mode1 = insn_data[icode].operand[1].mode;
17643 mode2 = insn_data[icode].operand[2].mode;
17644
17645 op0 = force_reg (Pmode, op0);
17646 op0 = gen_rtx_MEM (mode1, op0);
17647
17648 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
17649 op0 = copy_to_mode_reg (mode0, op0);
17650 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
17651 op1 = copy_to_mode_reg (mode1, op1);
17652 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
17653 op2 = copy_to_mode_reg (mode2, op2);
17654 pat = GEN_FCN (icode) (op0, op1, op2);
17655 if (! pat)
17656 return 0;
17657 emit_insn (pat);
17658 return 0;
17659
17660 case IX86_BUILTIN_SQRTSS:
17661 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
17662 case IX86_BUILTIN_RSQRTSS:
17663 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
17664 case IX86_BUILTIN_RCPSS:
17665 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
17666
17667 case IX86_BUILTIN_LOADUPS:
17668 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
17669
17670 case IX86_BUILTIN_STOREUPS:
17671 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
17672
17673 case IX86_BUILTIN_LOADHPS:
17674 case IX86_BUILTIN_LOADLPS:
17675 case IX86_BUILTIN_LOADHPD:
17676 case IX86_BUILTIN_LOADLPD:
17677 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
17678 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
17679 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
17680 : CODE_FOR_sse2_loadlpd);
17681 arg0 = CALL_EXPR_ARG (exp, 0);
17682 arg1 = CALL_EXPR_ARG (exp, 1);
17683 op0 = expand_normal (arg0);
17684 op1 = expand_normal (arg1);
17685 tmode = insn_data[icode].operand[0].mode;
17686 mode0 = insn_data[icode].operand[1].mode;
17687 mode1 = insn_data[icode].operand[2].mode;
17688
17689 op0 = force_reg (mode0, op0);
17690 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
17691 if (optimize || target == 0
17692 || GET_MODE (target) != tmode
17693 || !register_operand (target, tmode))
17694 target = gen_reg_rtx (tmode);
17695 pat = GEN_FCN (icode) (target, op0, op1);
17696 if (! pat)
17697 return 0;
17698 emit_insn (pat);
17699 return target;
17700
17701 case IX86_BUILTIN_STOREHPS:
17702 case IX86_BUILTIN_STORELPS:
17703 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
17704 : CODE_FOR_sse_storelps);
17705 arg0 = CALL_EXPR_ARG (exp, 0);
17706 arg1 = CALL_EXPR_ARG (exp, 1);
17707 op0 = expand_normal (arg0);
17708 op1 = expand_normal (arg1);
17709 mode0 = insn_data[icode].operand[0].mode;
17710 mode1 = insn_data[icode].operand[1].mode;
17711
17712 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
17713 op1 = force_reg (mode1, op1);
17714
17715 pat = GEN_FCN (icode) (op0, op1);
17716 if (! pat)
17717 return 0;
17718 emit_insn (pat);
17719 return const0_rtx;
17720
17721 case IX86_BUILTIN_MOVNTPS:
17722 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
17723 case IX86_BUILTIN_MOVNTQ:
17724 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
17725
17726 case IX86_BUILTIN_LDMXCSR:
17727 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
17728 target = assign_386_stack_local (SImode, SLOT_TEMP);
17729 emit_move_insn (target, op0);
17730 emit_insn (gen_sse_ldmxcsr (target));
17731 return 0;
17732
17733 case IX86_BUILTIN_STMXCSR:
17734 target = assign_386_stack_local (SImode, SLOT_TEMP);
17735 emit_insn (gen_sse_stmxcsr (target));
17736 return copy_to_mode_reg (SImode, target);
17737
17738 case IX86_BUILTIN_SHUFPS:
17739 case IX86_BUILTIN_SHUFPD:
17740 icode = (fcode == IX86_BUILTIN_SHUFPS
17741 ? CODE_FOR_sse_shufps
17742 : CODE_FOR_sse2_shufpd);
17743 arg0 = CALL_EXPR_ARG (exp, 0);
17744 arg1 = CALL_EXPR_ARG (exp, 1);
17745 arg2 = CALL_EXPR_ARG (exp, 2);
17746 op0 = expand_normal (arg0);
17747 op1 = expand_normal (arg1);
17748 op2 = expand_normal (arg2);
17749 tmode = insn_data[icode].operand[0].mode;
17750 mode0 = insn_data[icode].operand[1].mode;
17751 mode1 = insn_data[icode].operand[2].mode;
17752 mode2 = insn_data[icode].operand[3].mode;
17753
17754 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
17755 op0 = copy_to_mode_reg (mode0, op0);
17756 if ((optimize && !register_operand (op1, mode1))
17757 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
17758 op1 = copy_to_mode_reg (mode1, op1);
17759 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
17760 {
17761 /* @@@ better error message */
17762 error ("mask must be an immediate");
17763 return gen_reg_rtx (tmode);
17764 }
17765 if (optimize || target == 0
17766 || GET_MODE (target) != tmode
17767 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17768 target = gen_reg_rtx (tmode);
17769 pat = GEN_FCN (icode) (target, op0, op1, op2);
17770 if (! pat)
17771 return 0;
17772 emit_insn (pat);
17773 return target;
17774
17775 case IX86_BUILTIN_PSHUFW:
17776 case IX86_BUILTIN_PSHUFD:
17777 case IX86_BUILTIN_PSHUFHW:
17778 case IX86_BUILTIN_PSHUFLW:
17779 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
17780 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
17781 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
17782 : CODE_FOR_mmx_pshufw);
17783 arg0 = CALL_EXPR_ARG (exp, 0);
17784 arg1 = CALL_EXPR_ARG (exp, 1);
17785 op0 = expand_normal (arg0);
17786 op1 = expand_normal (arg1);
17787 tmode = insn_data[icode].operand[0].mode;
17788 mode1 = insn_data[icode].operand[1].mode;
17789 mode2 = insn_data[icode].operand[2].mode;
17790
17791 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17792 op0 = copy_to_mode_reg (mode1, op0);
17793 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17794 {
17795 /* @@@ better error message */
17796 error ("mask must be an immediate");
17797 return const0_rtx;
17798 }
17799 if (target == 0
17800 || GET_MODE (target) != tmode
17801 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
17802 target = gen_reg_rtx (tmode);
17803 pat = GEN_FCN (icode) (target, op0, op1);
17804 if (! pat)
17805 return 0;
17806 emit_insn (pat);
17807 return target;
17808
17809 case IX86_BUILTIN_PSLLWI128:
17810 icode = CODE_FOR_ashlv8hi3;
17811 goto do_pshifti;
17812 case IX86_BUILTIN_PSLLDI128:
17813 icode = CODE_FOR_ashlv4si3;
17814 goto do_pshifti;
17815 case IX86_BUILTIN_PSLLQI128:
17816 icode = CODE_FOR_ashlv2di3;
17817 goto do_pshifti;
17818 case IX86_BUILTIN_PSRAWI128:
17819 icode = CODE_FOR_ashrv8hi3;
17820 goto do_pshifti;
17821 case IX86_BUILTIN_PSRADI128:
17822 icode = CODE_FOR_ashrv4si3;
17823 goto do_pshifti;
17824 case IX86_BUILTIN_PSRLWI128:
17825 icode = CODE_FOR_lshrv8hi3;
17826 goto do_pshifti;
17827 case IX86_BUILTIN_PSRLDI128:
17828 icode = CODE_FOR_lshrv4si3;
17829 goto do_pshifti;
17830 case IX86_BUILTIN_PSRLQI128:
17831 icode = CODE_FOR_lshrv2di3;
17832 goto do_pshifti;
17833 do_pshifti:
17834 arg0 = CALL_EXPR_ARG (exp, 0);
17835 arg1 = CALL_EXPR_ARG (exp, 1);
17836 op0 = expand_normal (arg0);
17837 op1 = expand_normal (arg1);
17838
17839 if (!CONST_INT_P (op1))
17840 {
17841 error ("shift must be an immediate");
17842 return const0_rtx;
17843 }
17844 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
17845 op1 = GEN_INT (255);
17846
17847 tmode = insn_data[icode].operand[0].mode;
17848 mode1 = insn_data[icode].operand[1].mode;
17849 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17850 op0 = copy_to_reg (op0);
17851
17852 target = gen_reg_rtx (tmode);
17853 pat = GEN_FCN (icode) (target, op0, op1);
17854 if (!pat)
17855 return 0;
17856 emit_insn (pat);
17857 return target;
17858
17859 case IX86_BUILTIN_PSLLW128:
17860 icode = CODE_FOR_ashlv8hi3;
17861 goto do_pshift;
17862 case IX86_BUILTIN_PSLLD128:
17863 icode = CODE_FOR_ashlv4si3;
17864 goto do_pshift;
17865 case IX86_BUILTIN_PSLLQ128:
17866 icode = CODE_FOR_ashlv2di3;
17867 goto do_pshift;
17868 case IX86_BUILTIN_PSRAW128:
17869 icode = CODE_FOR_ashrv8hi3;
17870 goto do_pshift;
17871 case IX86_BUILTIN_PSRAD128:
17872 icode = CODE_FOR_ashrv4si3;
17873 goto do_pshift;
17874 case IX86_BUILTIN_PSRLW128:
17875 icode = CODE_FOR_lshrv8hi3;
17876 goto do_pshift;
17877 case IX86_BUILTIN_PSRLD128:
17878 icode = CODE_FOR_lshrv4si3;
17879 goto do_pshift;
17880 case IX86_BUILTIN_PSRLQ128:
17881 icode = CODE_FOR_lshrv2di3;
17882 goto do_pshift;
17883 do_pshift:
17884 arg0 = CALL_EXPR_ARG (exp, 0);
17885 arg1 = CALL_EXPR_ARG (exp, 1);
17886 op0 = expand_normal (arg0);
17887 op1 = expand_normal (arg1);
17888
17889 tmode = insn_data[icode].operand[0].mode;
17890 mode1 = insn_data[icode].operand[1].mode;
17891
17892 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17893 op0 = copy_to_reg (op0);
17894
17895 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
17896 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
17897 op1 = copy_to_reg (op1);
17898
17899 target = gen_reg_rtx (tmode);
17900 pat = GEN_FCN (icode) (target, op0, op1);
17901 if (!pat)
17902 return 0;
17903 emit_insn (pat);
17904 return target;
17905
17906 case IX86_BUILTIN_PSLLDQI128:
17907 case IX86_BUILTIN_PSRLDQI128:
17908 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
17909 : CODE_FOR_sse2_lshrti3);
17910 arg0 = CALL_EXPR_ARG (exp, 0);
17911 arg1 = CALL_EXPR_ARG (exp, 1);
17912 op0 = expand_normal (arg0);
17913 op1 = expand_normal (arg1);
17914 tmode = insn_data[icode].operand[0].mode;
17915 mode1 = insn_data[icode].operand[1].mode;
17916 mode2 = insn_data[icode].operand[2].mode;
17917
17918 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
17919 {
17920 op0 = copy_to_reg (op0);
17921 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
17922 }
17923 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
17924 {
17925 error ("shift must be an immediate");
17926 return const0_rtx;
17927 }
17928 target = gen_reg_rtx (V2DImode);
17929 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
17930 op0, op1);
17931 if (! pat)
17932 return 0;
17933 emit_insn (pat);
17934 return target;
17935
17936 case IX86_BUILTIN_FEMMS:
17937 emit_insn (gen_mmx_femms ());
17938 return NULL_RTX;
17939
17940 case IX86_BUILTIN_PAVGUSB:
17941 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
17942
17943 case IX86_BUILTIN_PF2ID:
17944 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
17945
17946 case IX86_BUILTIN_PFACC:
17947 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
17948
17949 case IX86_BUILTIN_PFADD:
17950 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
17951
17952 case IX86_BUILTIN_PFCMPEQ:
17953 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
17954
17955 case IX86_BUILTIN_PFCMPGE:
17956 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
17957
17958 case IX86_BUILTIN_PFCMPGT:
17959 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
17960
17961 case IX86_BUILTIN_PFMAX:
17962 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
17963
17964 case IX86_BUILTIN_PFMIN:
17965 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
17966
17967 case IX86_BUILTIN_PFMUL:
17968 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
17969
17970 case IX86_BUILTIN_PFRCP:
17971 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
17972
17973 case IX86_BUILTIN_PFRCPIT1:
17974 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
17975
17976 case IX86_BUILTIN_PFRCPIT2:
17977 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
17978
17979 case IX86_BUILTIN_PFRSQIT1:
17980 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
17981
17982 case IX86_BUILTIN_PFRSQRT:
17983 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
17984
17985 case IX86_BUILTIN_PFSUB:
17986 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
17987
17988 case IX86_BUILTIN_PFSUBR:
17989 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
17990
17991 case IX86_BUILTIN_PI2FD:
17992 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
17993
17994 case IX86_BUILTIN_PMULHRW:
17995 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
17996
17997 case IX86_BUILTIN_PF2IW:
17998 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
17999
18000 case IX86_BUILTIN_PFNACC:
18001 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
18002
18003 case IX86_BUILTIN_PFPNACC:
18004 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
18005
18006 case IX86_BUILTIN_PI2FW:
18007 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
18008
18009 case IX86_BUILTIN_PSWAPDSI:
18010 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
18011
18012 case IX86_BUILTIN_PSWAPDSF:
18013 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
18014
18015 case IX86_BUILTIN_SQRTSD:
18016 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
18017 case IX86_BUILTIN_LOADUPD:
18018 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
18019 case IX86_BUILTIN_STOREUPD:
18020 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
18021
18022 case IX86_BUILTIN_MFENCE:
18023 emit_insn (gen_sse2_mfence ());
18024 return 0;
18025 case IX86_BUILTIN_LFENCE:
18026 emit_insn (gen_sse2_lfence ());
18027 return 0;
18028
18029 case IX86_BUILTIN_CLFLUSH:
18030 arg0 = CALL_EXPR_ARG (exp, 0);
18031 op0 = expand_normal (arg0);
18032 icode = CODE_FOR_sse2_clflush;
18033 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
18034 op0 = copy_to_mode_reg (Pmode, op0);
18035
18036 emit_insn (gen_sse2_clflush (op0));
18037 return 0;
18038
18039 case IX86_BUILTIN_MOVNTPD:
18040 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
18041 case IX86_BUILTIN_MOVNTDQ:
18042 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
18043 case IX86_BUILTIN_MOVNTI:
18044 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
18045
18046 case IX86_BUILTIN_LOADDQU:
18047 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
18048 case IX86_BUILTIN_STOREDQU:
18049 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
18050
18051 case IX86_BUILTIN_MONITOR:
18052 arg0 = CALL_EXPR_ARG (exp, 0);
18053 arg1 = CALL_EXPR_ARG (exp, 1);
18054 arg2 = CALL_EXPR_ARG (exp, 2);
18055 op0 = expand_normal (arg0);
18056 op1 = expand_normal (arg1);
18057 op2 = expand_normal (arg2);
18058 if (!REG_P (op0))
18059 op0 = copy_to_mode_reg (Pmode, op0);
18060 if (!REG_P (op1))
18061 op1 = copy_to_mode_reg (SImode, op1);
18062 if (!REG_P (op2))
18063 op2 = copy_to_mode_reg (SImode, op2);
18064 if (!TARGET_64BIT)
18065 emit_insn (gen_sse3_monitor (op0, op1, op2));
18066 else
18067 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
18068 return 0;
18069
18070 case IX86_BUILTIN_MWAIT:
18071 arg0 = CALL_EXPR_ARG (exp, 0);
18072 arg1 = CALL_EXPR_ARG (exp, 1);
18073 op0 = expand_normal (arg0);
18074 op1 = expand_normal (arg1);
18075 if (!REG_P (op0))
18076 op0 = copy_to_mode_reg (SImode, op0);
18077 if (!REG_P (op1))
18078 op1 = copy_to_mode_reg (SImode, op1);
18079 emit_insn (gen_sse3_mwait (op0, op1));
18080 return 0;
18081
18082 case IX86_BUILTIN_LDDQU:
18083 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
18084 target, 1);
18085
18086 case IX86_BUILTIN_PALIGNR:
18087 case IX86_BUILTIN_PALIGNR128:
18088 if (fcode == IX86_BUILTIN_PALIGNR)
18089 {
18090 icode = CODE_FOR_ssse3_palignrdi;
18091 mode = DImode;
18092 }
18093 else
18094 {
18095 icode = CODE_FOR_ssse3_palignrti;
18096 mode = V2DImode;
18097 }
18098 arg0 = CALL_EXPR_ARG (exp, 0);
18099 arg1 = CALL_EXPR_ARG (exp, 1);
18100 arg2 = CALL_EXPR_ARG (exp, 2);
18101 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0);
18102 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0);
18103 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0);
18104 tmode = insn_data[icode].operand[0].mode;
18105 mode1 = insn_data[icode].operand[1].mode;
18106 mode2 = insn_data[icode].operand[2].mode;
18107 mode3 = insn_data[icode].operand[3].mode;
18108
18109 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18110 {
18111 op0 = copy_to_reg (op0);
18112 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
18113 }
18114 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18115 {
18116 op1 = copy_to_reg (op1);
18117 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
18118 }
18119 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18120 {
18121 error ("shift must be an immediate");
18122 return const0_rtx;
18123 }
18124 target = gen_reg_rtx (mode);
18125 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
18126 op0, op1, op2);
18127 if (! pat)
18128 return 0;
18129 emit_insn (pat);
18130 return target;
18131
18132 case IX86_BUILTIN_MOVNTSD:
18133 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
18134
18135 case IX86_BUILTIN_MOVNTSS:
18136 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
18137
18138 case IX86_BUILTIN_INSERTQ:
18139 case IX86_BUILTIN_EXTRQ:
18140 icode = (fcode == IX86_BUILTIN_EXTRQ
18141 ? CODE_FOR_sse4a_extrq
18142 : CODE_FOR_sse4a_insertq);
18143 arg0 = CALL_EXPR_ARG (exp, 0);
18144 arg1 = CALL_EXPR_ARG (exp, 1);
18145 op0 = expand_normal (arg0);
18146 op1 = expand_normal (arg1);
18147 tmode = insn_data[icode].operand[0].mode;
18148 mode1 = insn_data[icode].operand[1].mode;
18149 mode2 = insn_data[icode].operand[2].mode;
18150 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18151 op0 = copy_to_mode_reg (mode1, op0);
18152 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18153 op1 = copy_to_mode_reg (mode2, op1);
18154 if (optimize || target == 0
18155 || GET_MODE (target) != tmode
18156 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18157 target = gen_reg_rtx (tmode);
18158 pat = GEN_FCN (icode) (target, op0, op1);
18159 if (! pat)
18160 return NULL_RTX;
18161 emit_insn (pat);
18162 return target;
18163
18164 case IX86_BUILTIN_EXTRQI:
18165 icode = CODE_FOR_sse4a_extrqi;
18166 arg0 = CALL_EXPR_ARG (exp, 0);
18167 arg1 = CALL_EXPR_ARG (exp, 1);
18168 arg2 = CALL_EXPR_ARG (exp, 2);
18169 op0 = expand_normal (arg0);
18170 op1 = expand_normal (arg1);
18171 op2 = expand_normal (arg2);
18172 tmode = insn_data[icode].operand[0].mode;
18173 mode1 = insn_data[icode].operand[1].mode;
18174 mode2 = insn_data[icode].operand[2].mode;
18175 mode3 = insn_data[icode].operand[3].mode;
18176 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18177 op0 = copy_to_mode_reg (mode1, op0);
18178 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18179 {
18180 error ("index mask must be an immediate");
18181 return gen_reg_rtx (tmode);
18182 }
18183 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18184 {
18185 error ("length mask must be an immediate");
18186 return gen_reg_rtx (tmode);
18187 }
18188 if (optimize || target == 0
18189 || GET_MODE (target) != tmode
18190 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18191 target = gen_reg_rtx (tmode);
18192 pat = GEN_FCN (icode) (target, op0, op1, op2);
18193 if (! pat)
18194 return NULL_RTX;
18195 emit_insn (pat);
18196 return target;
18197
18198 case IX86_BUILTIN_INSERTQI:
18199 icode = CODE_FOR_sse4a_insertqi;
18200 arg0 = CALL_EXPR_ARG (exp, 0);
18201 arg1 = CALL_EXPR_ARG (exp, 1);
18202 arg2 = CALL_EXPR_ARG (exp, 2);
18203 arg3 = CALL_EXPR_ARG (exp, 3);
18204 op0 = expand_normal (arg0);
18205 op1 = expand_normal (arg1);
18206 op2 = expand_normal (arg2);
18207 op3 = expand_normal (arg3);
18208 tmode = insn_data[icode].operand[0].mode;
18209 mode1 = insn_data[icode].operand[1].mode;
18210 mode2 = insn_data[icode].operand[2].mode;
18211 mode3 = insn_data[icode].operand[3].mode;
18212 mode4 = insn_data[icode].operand[4].mode;
18213
18214 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18215 op0 = copy_to_mode_reg (mode1, op0);
18216
18217 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
18218 op1 = copy_to_mode_reg (mode2, op1);
18219
18220 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18221 {
18222 error ("index mask must be an immediate");
18223 return gen_reg_rtx (tmode);
18224 }
18225 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
18226 {
18227 error ("length mask must be an immediate");
18228 return gen_reg_rtx (tmode);
18229 }
18230 if (optimize || target == 0
18231 || GET_MODE (target) != tmode
18232 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18233 target = gen_reg_rtx (tmode);
18234 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
18235 if (! pat)
18236 return NULL_RTX;
18237 emit_insn (pat);
18238 return target;
18239
18240 case IX86_BUILTIN_VEC_INIT_V2SI:
18241 case IX86_BUILTIN_VEC_INIT_V4HI:
18242 case IX86_BUILTIN_VEC_INIT_V8QI:
18243 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
18244
18245 case IX86_BUILTIN_VEC_EXT_V2DF:
18246 case IX86_BUILTIN_VEC_EXT_V2DI:
18247 case IX86_BUILTIN_VEC_EXT_V4SF:
18248 case IX86_BUILTIN_VEC_EXT_V4SI:
18249 case IX86_BUILTIN_VEC_EXT_V8HI:
18250 case IX86_BUILTIN_VEC_EXT_V2SI:
18251 case IX86_BUILTIN_VEC_EXT_V4HI:
18252 return ix86_expand_vec_ext_builtin (exp, target);
18253
18254 case IX86_BUILTIN_VEC_SET_V8HI:
18255 case IX86_BUILTIN_VEC_SET_V4HI:
18256 return ix86_expand_vec_set_builtin (exp);
18257
18258 default:
18259 break;
18260 }
18261
18262 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
18263 if (d->code == fcode)
18264 {
18265 /* Compares are treated specially. */
18266 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
18267 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
18268 || d->icode == CODE_FOR_sse2_maskcmpv2df3
18269 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18270 return ix86_expand_sse_compare (d, exp, target);
18271
18272 return ix86_expand_binop_builtin (d->icode, exp, target);
18273 }
18274
18275 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18276 if (d->code == fcode)
18277 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
18278
18279 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18280 if (d->code == fcode)
18281 return ix86_expand_sse_comi (d, exp, target);
18282
18283 gcc_unreachable ();
18284 }
18285
18286 /* Returns a function decl for a vectorized version of the builtin function
18287 with builtin function code FN and the result vector type TYPE, or NULL_TREE
18288 if it is not available. */
18289
18290 static tree
18291 ix86_builtin_vectorized_function (enum built_in_function fn, tree type_out,
18292 tree type_in)
18293 {
18294 enum machine_mode in_mode, out_mode;
18295 int in_n, out_n;
18296
18297 if (TREE_CODE (type_out) != VECTOR_TYPE
18298 || TREE_CODE (type_in) != VECTOR_TYPE)
18299 return NULL_TREE;
18300
18301 out_mode = TYPE_MODE (TREE_TYPE (type_out));
18302 out_n = TYPE_VECTOR_SUBPARTS (type_out);
18303 in_mode = TYPE_MODE (TREE_TYPE (type_in));
18304 in_n = TYPE_VECTOR_SUBPARTS (type_in);
18305
18306 switch (fn)
18307 {
18308 case BUILT_IN_SQRT:
18309 if (out_mode == DFmode && out_n == 2
18310 && in_mode == DFmode && in_n == 2)
18311 return ix86_builtins[IX86_BUILTIN_SQRTPD];
18312 return NULL_TREE;
18313
18314 case BUILT_IN_SQRTF:
18315 if (out_mode == SFmode && out_n == 4
18316 && in_mode == SFmode && in_n == 4)
18317 return ix86_builtins[IX86_BUILTIN_SQRTPS];
18318 return NULL_TREE;
18319
18320 case BUILT_IN_LRINTF:
18321 if (out_mode == SImode && out_n == 4
18322 && in_mode == SFmode && in_n == 4)
18323 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
18324 return NULL_TREE;
18325
18326 default:
18327 ;
18328 }
18329
18330 return NULL_TREE;
18331 }
18332
18333 /* Returns a decl of a function that implements conversion of the
18334 input vector of type TYPE, or NULL_TREE if it is not available. */
18335
18336 static tree
18337 ix86_builtin_conversion (enum tree_code code, tree type)
18338 {
18339 if (TREE_CODE (type) != VECTOR_TYPE)
18340 return NULL_TREE;
18341
18342 switch (code)
18343 {
18344 case FLOAT_EXPR:
18345 switch (TYPE_MODE (type))
18346 {
18347 case V4SImode:
18348 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
18349 default:
18350 return NULL_TREE;
18351 }
18352
18353 case FIX_TRUNC_EXPR:
18354 switch (TYPE_MODE (type))
18355 {
18356 case V4SFmode:
18357 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
18358 default:
18359 return NULL_TREE;
18360 }
18361 default:
18362 return NULL_TREE;
18363
18364 }
18365 }
18366
18367 /* Store OPERAND to the memory after reload is completed. This means
18368 that we can't easily use assign_stack_local. */
18369 rtx
18370 ix86_force_to_memory (enum machine_mode mode, rtx operand)
18371 {
18372 rtx result;
18373
18374 gcc_assert (reload_completed);
18375 if (TARGET_RED_ZONE)
18376 {
18377 result = gen_rtx_MEM (mode,
18378 gen_rtx_PLUS (Pmode,
18379 stack_pointer_rtx,
18380 GEN_INT (-RED_ZONE_SIZE)));
18381 emit_move_insn (result, operand);
18382 }
18383 else if (!TARGET_RED_ZONE && TARGET_64BIT)
18384 {
18385 switch (mode)
18386 {
18387 case HImode:
18388 case SImode:
18389 operand = gen_lowpart (DImode, operand);
18390 /* FALLTHRU */
18391 case DImode:
18392 emit_insn (
18393 gen_rtx_SET (VOIDmode,
18394 gen_rtx_MEM (DImode,
18395 gen_rtx_PRE_DEC (DImode,
18396 stack_pointer_rtx)),
18397 operand));
18398 break;
18399 default:
18400 gcc_unreachable ();
18401 }
18402 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18403 }
18404 else
18405 {
18406 switch (mode)
18407 {
18408 case DImode:
18409 {
18410 rtx operands[2];
18411 split_di (&operand, 1, operands, operands + 1);
18412 emit_insn (
18413 gen_rtx_SET (VOIDmode,
18414 gen_rtx_MEM (SImode,
18415 gen_rtx_PRE_DEC (Pmode,
18416 stack_pointer_rtx)),
18417 operands[1]));
18418 emit_insn (
18419 gen_rtx_SET (VOIDmode,
18420 gen_rtx_MEM (SImode,
18421 gen_rtx_PRE_DEC (Pmode,
18422 stack_pointer_rtx)),
18423 operands[0]));
18424 }
18425 break;
18426 case HImode:
18427 /* Store HImodes as SImodes. */
18428 operand = gen_lowpart (SImode, operand);
18429 /* FALLTHRU */
18430 case SImode:
18431 emit_insn (
18432 gen_rtx_SET (VOIDmode,
18433 gen_rtx_MEM (GET_MODE (operand),
18434 gen_rtx_PRE_DEC (SImode,
18435 stack_pointer_rtx)),
18436 operand));
18437 break;
18438 default:
18439 gcc_unreachable ();
18440 }
18441 result = gen_rtx_MEM (mode, stack_pointer_rtx);
18442 }
18443 return result;
18444 }
18445
18446 /* Free operand from the memory. */
18447 void
18448 ix86_free_from_memory (enum machine_mode mode)
18449 {
18450 if (!TARGET_RED_ZONE)
18451 {
18452 int size;
18453
18454 if (mode == DImode || TARGET_64BIT)
18455 size = 8;
18456 else
18457 size = 4;
18458 /* Use LEA to deallocate stack space. In peephole2 it will be converted
18459 to pop or add instruction if registers are available. */
18460 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
18461 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
18462 GEN_INT (size))));
18463 }
18464 }
18465
18466 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
18467 QImode must go into class Q_REGS.
18468 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
18469 movdf to do mem-to-mem moves through integer regs. */
18470 enum reg_class
18471 ix86_preferred_reload_class (rtx x, enum reg_class class)
18472 {
18473 enum machine_mode mode = GET_MODE (x);
18474
18475 /* We're only allowed to return a subclass of CLASS. Many of the
18476 following checks fail for NO_REGS, so eliminate that early. */
18477 if (class == NO_REGS)
18478 return NO_REGS;
18479
18480 /* All classes can load zeros. */
18481 if (x == CONST0_RTX (mode))
18482 return class;
18483
18484 /* Force constants into memory if we are loading a (nonzero) constant into
18485 an MMX or SSE register. This is because there are no MMX/SSE instructions
18486 to load from a constant. */
18487 if (CONSTANT_P (x)
18488 && (MAYBE_MMX_CLASS_P (class) || MAYBE_SSE_CLASS_P (class)))
18489 return NO_REGS;
18490
18491 /* Prefer SSE regs only, if we can use them for math. */
18492 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
18493 return SSE_CLASS_P (class) ? class : NO_REGS;
18494
18495 /* Floating-point constants need more complex checks. */
18496 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
18497 {
18498 /* General regs can load everything. */
18499 if (reg_class_subset_p (class, GENERAL_REGS))
18500 return class;
18501
18502 /* Floats can load 0 and 1 plus some others. Note that we eliminated
18503 zero above. We only want to wind up preferring 80387 registers if
18504 we plan on doing computation with them. */
18505 if (TARGET_80387
18506 && standard_80387_constant_p (x))
18507 {
18508 /* Limit class to non-sse. */
18509 if (class == FLOAT_SSE_REGS)
18510 return FLOAT_REGS;
18511 if (class == FP_TOP_SSE_REGS)
18512 return FP_TOP_REG;
18513 if (class == FP_SECOND_SSE_REGS)
18514 return FP_SECOND_REG;
18515 if (class == FLOAT_INT_REGS || class == FLOAT_REGS)
18516 return class;
18517 }
18518
18519 return NO_REGS;
18520 }
18521
18522 /* Generally when we see PLUS here, it's the function invariant
18523 (plus soft-fp const_int). Which can only be computed into general
18524 regs. */
18525 if (GET_CODE (x) == PLUS)
18526 return reg_class_subset_p (class, GENERAL_REGS) ? class : NO_REGS;
18527
18528 /* QImode constants are easy to load, but non-constant QImode data
18529 must go into Q_REGS. */
18530 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
18531 {
18532 if (reg_class_subset_p (class, Q_REGS))
18533 return class;
18534 if (reg_class_subset_p (Q_REGS, class))
18535 return Q_REGS;
18536 return NO_REGS;
18537 }
18538
18539 return class;
18540 }
18541
18542 /* Discourage putting floating-point values in SSE registers unless
18543 SSE math is being used, and likewise for the 387 registers. */
18544 enum reg_class
18545 ix86_preferred_output_reload_class (rtx x, enum reg_class class)
18546 {
18547 enum machine_mode mode = GET_MODE (x);
18548
18549 /* Restrict the output reload class to the register bank that we are doing
18550 math on. If we would like not to return a subset of CLASS, reject this
18551 alternative: if reload cannot do this, it will still use its choice. */
18552 mode = GET_MODE (x);
18553 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
18554 return MAYBE_SSE_CLASS_P (class) ? SSE_REGS : NO_REGS;
18555
18556 if (TARGET_80387 && SCALAR_FLOAT_MODE_P (mode))
18557 {
18558 if (class == FP_TOP_SSE_REGS)
18559 return FP_TOP_REG;
18560 else if (class == FP_SECOND_SSE_REGS)
18561 return FP_SECOND_REG;
18562 else
18563 return FLOAT_CLASS_P (class) ? class : NO_REGS;
18564 }
18565
18566 return class;
18567 }
18568
18569 /* If we are copying between general and FP registers, we need a memory
18570 location. The same is true for SSE and MMX registers.
18571
18572 The macro can't work reliably when one of the CLASSES is class containing
18573 registers from multiple units (SSE, MMX, integer). We avoid this by never
18574 combining those units in single alternative in the machine description.
18575 Ensure that this constraint holds to avoid unexpected surprises.
18576
18577 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
18578 enforce these sanity checks. */
18579
18580 int
18581 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
18582 enum machine_mode mode, int strict)
18583 {
18584 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
18585 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
18586 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
18587 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
18588 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
18589 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
18590 {
18591 gcc_assert (!strict);
18592 return true;
18593 }
18594
18595 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
18596 return true;
18597
18598 /* ??? This is a lie. We do have moves between mmx/general, and for
18599 mmx/sse2. But by saying we need secondary memory we discourage the
18600 register allocator from using the mmx registers unless needed. */
18601 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
18602 return true;
18603
18604 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18605 {
18606 /* SSE1 doesn't have any direct moves from other classes. */
18607 if (!TARGET_SSE2)
18608 return true;
18609
18610 /* If the target says that inter-unit moves are more expensive
18611 than moving through memory, then don't generate them. */
18612 if (!TARGET_INTER_UNIT_MOVES)
18613 return true;
18614
18615 /* Between SSE and general, we have moves no larger than word size. */
18616 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
18617 return true;
18618 }
18619
18620 return false;
18621 }
18622
18623 /* Return true if the registers in CLASS cannot represent the change from
18624 modes FROM to TO. */
18625
18626 bool
18627 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
18628 enum reg_class class)
18629 {
18630 if (from == to)
18631 return false;
18632
18633 /* x87 registers can't do subreg at all, as all values are reformatted
18634 to extended precision. */
18635 if (MAYBE_FLOAT_CLASS_P (class))
18636 return true;
18637
18638 if (MAYBE_SSE_CLASS_P (class) || MAYBE_MMX_CLASS_P (class))
18639 {
18640 /* Vector registers do not support QI or HImode loads. If we don't
18641 disallow a change to these modes, reload will assume it's ok to
18642 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
18643 the vec_dupv4hi pattern. */
18644 if (GET_MODE_SIZE (from) < 4)
18645 return true;
18646
18647 /* Vector registers do not support subreg with nonzero offsets, which
18648 are otherwise valid for integer registers. Since we can't see
18649 whether we have a nonzero offset from here, prohibit all
18650 nonparadoxical subregs changing size. */
18651 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
18652 return true;
18653 }
18654
18655 return false;
18656 }
18657
18658 /* Return the cost of moving data from a register in class CLASS1 to
18659 one in class CLASS2.
18660
18661 It is not required that the cost always equal 2 when FROM is the same as TO;
18662 on some machines it is expensive to move between registers if they are not
18663 general registers. */
18664
18665 int
18666 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
18667 enum reg_class class2)
18668 {
18669 /* In case we require secondary memory, compute cost of the store followed
18670 by load. In order to avoid bad register allocation choices, we need
18671 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
18672
18673 if (ix86_secondary_memory_needed (class1, class2, mode, 0))
18674 {
18675 int cost = 1;
18676
18677 cost += MAX (MEMORY_MOVE_COST (mode, class1, 0),
18678 MEMORY_MOVE_COST (mode, class1, 1));
18679 cost += MAX (MEMORY_MOVE_COST (mode, class2, 0),
18680 MEMORY_MOVE_COST (mode, class2, 1));
18681
18682 /* In case of copying from general_purpose_register we may emit multiple
18683 stores followed by single load causing memory size mismatch stall.
18684 Count this as arbitrarily high cost of 20. */
18685 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
18686 cost += 20;
18687
18688 /* In the case of FP/MMX moves, the registers actually overlap, and we
18689 have to switch modes in order to treat them differently. */
18690 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
18691 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
18692 cost += 20;
18693
18694 return cost;
18695 }
18696
18697 /* Moves between SSE/MMX and integer unit are expensive. */
18698 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
18699 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
18700 return ix86_cost->mmxsse_to_integer;
18701 if (MAYBE_FLOAT_CLASS_P (class1))
18702 return ix86_cost->fp_move;
18703 if (MAYBE_SSE_CLASS_P (class1))
18704 return ix86_cost->sse_move;
18705 if (MAYBE_MMX_CLASS_P (class1))
18706 return ix86_cost->mmx_move;
18707 return 2;
18708 }
18709
18710 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
18711
18712 bool
18713 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
18714 {
18715 /* Flags and only flags can only hold CCmode values. */
18716 if (CC_REGNO_P (regno))
18717 return GET_MODE_CLASS (mode) == MODE_CC;
18718 if (GET_MODE_CLASS (mode) == MODE_CC
18719 || GET_MODE_CLASS (mode) == MODE_RANDOM
18720 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
18721 return 0;
18722 if (FP_REGNO_P (regno))
18723 return VALID_FP_MODE_P (mode);
18724 if (SSE_REGNO_P (regno))
18725 {
18726 /* We implement the move patterns for all vector modes into and
18727 out of SSE registers, even when no operation instructions
18728 are available. */
18729 return (VALID_SSE_REG_MODE (mode)
18730 || VALID_SSE2_REG_MODE (mode)
18731 || VALID_MMX_REG_MODE (mode)
18732 || VALID_MMX_REG_MODE_3DNOW (mode));
18733 }
18734 if (MMX_REGNO_P (regno))
18735 {
18736 /* We implement the move patterns for 3DNOW modes even in MMX mode,
18737 so if the register is available at all, then we can move data of
18738 the given mode into or out of it. */
18739 return (VALID_MMX_REG_MODE (mode)
18740 || VALID_MMX_REG_MODE_3DNOW (mode));
18741 }
18742
18743 if (mode == QImode)
18744 {
18745 /* Take care for QImode values - they can be in non-QI regs,
18746 but then they do cause partial register stalls. */
18747 if (regno < 4 || TARGET_64BIT)
18748 return 1;
18749 if (!TARGET_PARTIAL_REG_STALL)
18750 return 1;
18751 return reload_in_progress || reload_completed;
18752 }
18753 /* We handle both integer and floats in the general purpose registers. */
18754 else if (VALID_INT_MODE_P (mode))
18755 return 1;
18756 else if (VALID_FP_MODE_P (mode))
18757 return 1;
18758 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
18759 on to use that value in smaller contexts, this can easily force a
18760 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
18761 supporting DImode, allow it. */
18762 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
18763 return 1;
18764
18765 return 0;
18766 }
18767
18768 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
18769 tieable integer mode. */
18770
18771 static bool
18772 ix86_tieable_integer_mode_p (enum machine_mode mode)
18773 {
18774 switch (mode)
18775 {
18776 case HImode:
18777 case SImode:
18778 return true;
18779
18780 case QImode:
18781 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
18782
18783 case DImode:
18784 return TARGET_64BIT;
18785
18786 default:
18787 return false;
18788 }
18789 }
18790
18791 /* Return true if MODE1 is accessible in a register that can hold MODE2
18792 without copying. That is, all register classes that can hold MODE2
18793 can also hold MODE1. */
18794
18795 bool
18796 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
18797 {
18798 if (mode1 == mode2)
18799 return true;
18800
18801 if (ix86_tieable_integer_mode_p (mode1)
18802 && ix86_tieable_integer_mode_p (mode2))
18803 return true;
18804
18805 /* MODE2 being XFmode implies fp stack or general regs, which means we
18806 can tie any smaller floating point modes to it. Note that we do not
18807 tie this with TFmode. */
18808 if (mode2 == XFmode)
18809 return mode1 == SFmode || mode1 == DFmode;
18810
18811 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
18812 that we can tie it with SFmode. */
18813 if (mode2 == DFmode)
18814 return mode1 == SFmode;
18815
18816 /* If MODE2 is only appropriate for an SSE register, then tie with
18817 any other mode acceptable to SSE registers. */
18818 if (GET_MODE_SIZE (mode2) == 16
18819 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
18820 return (GET_MODE_SIZE (mode1) == 16
18821 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
18822
18823 /* If MODE2 is appropriate for an MMX register, then tie
18824 with any other mode acceptable to MMX registers. */
18825 if (GET_MODE_SIZE (mode2) == 8
18826 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
18827 return (GET_MODE_SIZE (mode1) == 8
18828 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
18829
18830 return false;
18831 }
18832
18833 /* Return the cost of moving data of mode M between a
18834 register and memory. A value of 2 is the default; this cost is
18835 relative to those in `REGISTER_MOVE_COST'.
18836
18837 If moving between registers and memory is more expensive than
18838 between two registers, you should define this macro to express the
18839 relative cost.
18840
18841 Model also increased moving costs of QImode registers in non
18842 Q_REGS classes.
18843 */
18844 int
18845 ix86_memory_move_cost (enum machine_mode mode, enum reg_class class, int in)
18846 {
18847 if (FLOAT_CLASS_P (class))
18848 {
18849 int index;
18850 switch (mode)
18851 {
18852 case SFmode:
18853 index = 0;
18854 break;
18855 case DFmode:
18856 index = 1;
18857 break;
18858 case XFmode:
18859 index = 2;
18860 break;
18861 default:
18862 return 100;
18863 }
18864 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
18865 }
18866 if (SSE_CLASS_P (class))
18867 {
18868 int index;
18869 switch (GET_MODE_SIZE (mode))
18870 {
18871 case 4:
18872 index = 0;
18873 break;
18874 case 8:
18875 index = 1;
18876 break;
18877 case 16:
18878 index = 2;
18879 break;
18880 default:
18881 return 100;
18882 }
18883 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
18884 }
18885 if (MMX_CLASS_P (class))
18886 {
18887 int index;
18888 switch (GET_MODE_SIZE (mode))
18889 {
18890 case 4:
18891 index = 0;
18892 break;
18893 case 8:
18894 index = 1;
18895 break;
18896 default:
18897 return 100;
18898 }
18899 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
18900 }
18901 switch (GET_MODE_SIZE (mode))
18902 {
18903 case 1:
18904 if (in)
18905 return (Q_CLASS_P (class) ? ix86_cost->int_load[0]
18906 : ix86_cost->movzbl_load);
18907 else
18908 return (Q_CLASS_P (class) ? ix86_cost->int_store[0]
18909 : ix86_cost->int_store[0] + 4);
18910 break;
18911 case 2:
18912 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
18913 default:
18914 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
18915 if (mode == TFmode)
18916 mode = XFmode;
18917 return ((in ? ix86_cost->int_load[2] : ix86_cost->int_store[2])
18918 * (((int) GET_MODE_SIZE (mode)
18919 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
18920 }
18921 }
18922
18923 /* Compute a (partial) cost for rtx X. Return true if the complete
18924 cost has been computed, and false if subexpressions should be
18925 scanned. In either case, *TOTAL contains the cost result. */
18926
18927 static bool
18928 ix86_rtx_costs (rtx x, int code, int outer_code, int *total)
18929 {
18930 enum machine_mode mode = GET_MODE (x);
18931
18932 switch (code)
18933 {
18934 case CONST_INT:
18935 case CONST:
18936 case LABEL_REF:
18937 case SYMBOL_REF:
18938 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
18939 *total = 3;
18940 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
18941 *total = 2;
18942 else if (flag_pic && SYMBOLIC_CONST (x)
18943 && (!TARGET_64BIT
18944 || (!GET_CODE (x) != LABEL_REF
18945 && (GET_CODE (x) != SYMBOL_REF
18946 || !SYMBOL_REF_LOCAL_P (x)))))
18947 *total = 1;
18948 else
18949 *total = 0;
18950 return true;
18951
18952 case CONST_DOUBLE:
18953 if (mode == VOIDmode)
18954 *total = 0;
18955 else
18956 switch (standard_80387_constant_p (x))
18957 {
18958 case 1: /* 0.0 */
18959 *total = 1;
18960 break;
18961 default: /* Other constants */
18962 *total = 2;
18963 break;
18964 case 0:
18965 case -1:
18966 /* Start with (MEM (SYMBOL_REF)), since that's where
18967 it'll probably end up. Add a penalty for size. */
18968 *total = (COSTS_N_INSNS (1)
18969 + (flag_pic != 0 && !TARGET_64BIT)
18970 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
18971 break;
18972 }
18973 return true;
18974
18975 case ZERO_EXTEND:
18976 /* The zero extensions is often completely free on x86_64, so make
18977 it as cheap as possible. */
18978 if (TARGET_64BIT && mode == DImode
18979 && GET_MODE (XEXP (x, 0)) == SImode)
18980 *total = 1;
18981 else if (TARGET_ZERO_EXTEND_WITH_AND)
18982 *total = ix86_cost->add;
18983 else
18984 *total = ix86_cost->movzx;
18985 return false;
18986
18987 case SIGN_EXTEND:
18988 *total = ix86_cost->movsx;
18989 return false;
18990
18991 case ASHIFT:
18992 if (CONST_INT_P (XEXP (x, 1))
18993 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
18994 {
18995 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
18996 if (value == 1)
18997 {
18998 *total = ix86_cost->add;
18999 return false;
19000 }
19001 if ((value == 2 || value == 3)
19002 && ix86_cost->lea <= ix86_cost->shift_const)
19003 {
19004 *total = ix86_cost->lea;
19005 return false;
19006 }
19007 }
19008 /* FALLTHRU */
19009
19010 case ROTATE:
19011 case ASHIFTRT:
19012 case LSHIFTRT:
19013 case ROTATERT:
19014 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
19015 {
19016 if (CONST_INT_P (XEXP (x, 1)))
19017 {
19018 if (INTVAL (XEXP (x, 1)) > 32)
19019 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
19020 else
19021 *total = ix86_cost->shift_const * 2;
19022 }
19023 else
19024 {
19025 if (GET_CODE (XEXP (x, 1)) == AND)
19026 *total = ix86_cost->shift_var * 2;
19027 else
19028 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
19029 }
19030 }
19031 else
19032 {
19033 if (CONST_INT_P (XEXP (x, 1)))
19034 *total = ix86_cost->shift_const;
19035 else
19036 *total = ix86_cost->shift_var;
19037 }
19038 return false;
19039
19040 case MULT:
19041 if (FLOAT_MODE_P (mode))
19042 {
19043 *total = ix86_cost->fmul;
19044 return false;
19045 }
19046 else
19047 {
19048 rtx op0 = XEXP (x, 0);
19049 rtx op1 = XEXP (x, 1);
19050 int nbits;
19051 if (CONST_INT_P (XEXP (x, 1)))
19052 {
19053 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
19054 for (nbits = 0; value != 0; value &= value - 1)
19055 nbits++;
19056 }
19057 else
19058 /* This is arbitrary. */
19059 nbits = 7;
19060
19061 /* Compute costs correctly for widening multiplication. */
19062 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
19063 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
19064 == GET_MODE_SIZE (mode))
19065 {
19066 int is_mulwiden = 0;
19067 enum machine_mode inner_mode = GET_MODE (op0);
19068
19069 if (GET_CODE (op0) == GET_CODE (op1))
19070 is_mulwiden = 1, op1 = XEXP (op1, 0);
19071 else if (CONST_INT_P (op1))
19072 {
19073 if (GET_CODE (op0) == SIGN_EXTEND)
19074 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
19075 == INTVAL (op1);
19076 else
19077 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
19078 }
19079
19080 if (is_mulwiden)
19081 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
19082 }
19083
19084 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
19085 + nbits * ix86_cost->mult_bit
19086 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
19087
19088 return true;
19089 }
19090
19091 case DIV:
19092 case UDIV:
19093 case MOD:
19094 case UMOD:
19095 if (FLOAT_MODE_P (mode))
19096 *total = ix86_cost->fdiv;
19097 else
19098 *total = ix86_cost->divide[MODE_INDEX (mode)];
19099 return false;
19100
19101 case PLUS:
19102 if (FLOAT_MODE_P (mode))
19103 *total = ix86_cost->fadd;
19104 else if (GET_MODE_CLASS (mode) == MODE_INT
19105 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
19106 {
19107 if (GET_CODE (XEXP (x, 0)) == PLUS
19108 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
19109 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
19110 && CONSTANT_P (XEXP (x, 1)))
19111 {
19112 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
19113 if (val == 2 || val == 4 || val == 8)
19114 {
19115 *total = ix86_cost->lea;
19116 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19117 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
19118 outer_code);
19119 *total += rtx_cost (XEXP (x, 1), outer_code);
19120 return true;
19121 }
19122 }
19123 else if (GET_CODE (XEXP (x, 0)) == MULT
19124 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
19125 {
19126 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
19127 if (val == 2 || val == 4 || val == 8)
19128 {
19129 *total = ix86_cost->lea;
19130 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19131 *total += rtx_cost (XEXP (x, 1), outer_code);
19132 return true;
19133 }
19134 }
19135 else if (GET_CODE (XEXP (x, 0)) == PLUS)
19136 {
19137 *total = ix86_cost->lea;
19138 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
19139 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
19140 *total += rtx_cost (XEXP (x, 1), outer_code);
19141 return true;
19142 }
19143 }
19144 /* FALLTHRU */
19145
19146 case MINUS:
19147 if (FLOAT_MODE_P (mode))
19148 {
19149 *total = ix86_cost->fadd;
19150 return false;
19151 }
19152 /* FALLTHRU */
19153
19154 case AND:
19155 case IOR:
19156 case XOR:
19157 if (!TARGET_64BIT && mode == DImode)
19158 {
19159 *total = (ix86_cost->add * 2
19160 + (rtx_cost (XEXP (x, 0), outer_code)
19161 << (GET_MODE (XEXP (x, 0)) != DImode))
19162 + (rtx_cost (XEXP (x, 1), outer_code)
19163 << (GET_MODE (XEXP (x, 1)) != DImode)));
19164 return true;
19165 }
19166 /* FALLTHRU */
19167
19168 case NEG:
19169 if (FLOAT_MODE_P (mode))
19170 {
19171 *total = ix86_cost->fchs;
19172 return false;
19173 }
19174 /* FALLTHRU */
19175
19176 case NOT:
19177 if (!TARGET_64BIT && mode == DImode)
19178 *total = ix86_cost->add * 2;
19179 else
19180 *total = ix86_cost->add;
19181 return false;
19182
19183 case COMPARE:
19184 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
19185 && XEXP (XEXP (x, 0), 1) == const1_rtx
19186 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
19187 && XEXP (x, 1) == const0_rtx)
19188 {
19189 /* This kind of construct is implemented using test[bwl].
19190 Treat it as if we had an AND. */
19191 *total = (ix86_cost->add
19192 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
19193 + rtx_cost (const1_rtx, outer_code));
19194 return true;
19195 }
19196 return false;
19197
19198 case FLOAT_EXTEND:
19199 if (!TARGET_SSE_MATH
19200 || mode == XFmode
19201 || (mode == DFmode && !TARGET_SSE2))
19202 *total = 0;
19203 return false;
19204
19205 case ABS:
19206 if (FLOAT_MODE_P (mode))
19207 *total = ix86_cost->fabs;
19208 return false;
19209
19210 case SQRT:
19211 if (FLOAT_MODE_P (mode))
19212 *total = ix86_cost->fsqrt;
19213 return false;
19214
19215 case UNSPEC:
19216 if (XINT (x, 1) == UNSPEC_TP)
19217 *total = 0;
19218 return false;
19219
19220 default:
19221 return false;
19222 }
19223 }
19224
19225 #if TARGET_MACHO
19226
19227 static int current_machopic_label_num;
19228
19229 /* Given a symbol name and its associated stub, write out the
19230 definition of the stub. */
19231
19232 void
19233 machopic_output_stub (FILE *file, const char *symb, const char *stub)
19234 {
19235 unsigned int length;
19236 char *binder_name, *symbol_name, lazy_ptr_name[32];
19237 int label = ++current_machopic_label_num;
19238
19239 /* For 64-bit we shouldn't get here. */
19240 gcc_assert (!TARGET_64BIT);
19241
19242 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
19243 symb = (*targetm.strip_name_encoding) (symb);
19244
19245 length = strlen (stub);
19246 binder_name = alloca (length + 32);
19247 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
19248
19249 length = strlen (symb);
19250 symbol_name = alloca (length + 32);
19251 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
19252
19253 sprintf (lazy_ptr_name, "L%d$lz", label);
19254
19255 if (MACHOPIC_PURE)
19256 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
19257 else
19258 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
19259
19260 fprintf (file, "%s:\n", stub);
19261 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19262
19263 if (MACHOPIC_PURE)
19264 {
19265 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
19266 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
19267 fprintf (file, "\tjmp\t*%%edx\n");
19268 }
19269 else
19270 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
19271
19272 fprintf (file, "%s:\n", binder_name);
19273
19274 if (MACHOPIC_PURE)
19275 {
19276 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
19277 fprintf (file, "\tpushl\t%%eax\n");
19278 }
19279 else
19280 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
19281
19282 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
19283
19284 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
19285 fprintf (file, "%s:\n", lazy_ptr_name);
19286 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
19287 fprintf (file, "\t.long %s\n", binder_name);
19288 }
19289
19290 void
19291 darwin_x86_file_end (void)
19292 {
19293 darwin_file_end ();
19294 ix86_file_end ();
19295 }
19296 #endif /* TARGET_MACHO */
19297
19298 /* Order the registers for register allocator. */
19299
19300 void
19301 x86_order_regs_for_local_alloc (void)
19302 {
19303 int pos = 0;
19304 int i;
19305
19306 /* First allocate the local general purpose registers. */
19307 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19308 if (GENERAL_REGNO_P (i) && call_used_regs[i])
19309 reg_alloc_order [pos++] = i;
19310
19311 /* Global general purpose registers. */
19312 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
19313 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
19314 reg_alloc_order [pos++] = i;
19315
19316 /* x87 registers come first in case we are doing FP math
19317 using them. */
19318 if (!TARGET_SSE_MATH)
19319 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19320 reg_alloc_order [pos++] = i;
19321
19322 /* SSE registers. */
19323 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
19324 reg_alloc_order [pos++] = i;
19325 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
19326 reg_alloc_order [pos++] = i;
19327
19328 /* x87 registers. */
19329 if (TARGET_SSE_MATH)
19330 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
19331 reg_alloc_order [pos++] = i;
19332
19333 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
19334 reg_alloc_order [pos++] = i;
19335
19336 /* Initialize the rest of array as we do not allocate some registers
19337 at all. */
19338 while (pos < FIRST_PSEUDO_REGISTER)
19339 reg_alloc_order [pos++] = 0;
19340 }
19341
19342 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
19343 struct attribute_spec.handler. */
19344 static tree
19345 ix86_handle_struct_attribute (tree *node, tree name,
19346 tree args ATTRIBUTE_UNUSED,
19347 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
19348 {
19349 tree *type = NULL;
19350 if (DECL_P (*node))
19351 {
19352 if (TREE_CODE (*node) == TYPE_DECL)
19353 type = &TREE_TYPE (*node);
19354 }
19355 else
19356 type = node;
19357
19358 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
19359 || TREE_CODE (*type) == UNION_TYPE)))
19360 {
19361 warning (OPT_Wattributes, "%qs attribute ignored",
19362 IDENTIFIER_POINTER (name));
19363 *no_add_attrs = true;
19364 }
19365
19366 else if ((is_attribute_p ("ms_struct", name)
19367 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
19368 || ((is_attribute_p ("gcc_struct", name)
19369 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
19370 {
19371 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
19372 IDENTIFIER_POINTER (name));
19373 *no_add_attrs = true;
19374 }
19375
19376 return NULL_TREE;
19377 }
19378
19379 static bool
19380 ix86_ms_bitfield_layout_p (tree record_type)
19381 {
19382 return (TARGET_MS_BITFIELD_LAYOUT &&
19383 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
19384 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
19385 }
19386
19387 /* Returns an expression indicating where the this parameter is
19388 located on entry to the FUNCTION. */
19389
19390 static rtx
19391 x86_this_parameter (tree function)
19392 {
19393 tree type = TREE_TYPE (function);
19394
19395 if (TARGET_64BIT)
19396 {
19397 int n = aggregate_value_p (TREE_TYPE (type), type) != 0;
19398 return gen_rtx_REG (DImode, x86_64_int_parameter_registers[n]);
19399 }
19400
19401 if (ix86_function_regparm (type, function) > 0)
19402 {
19403 tree parm;
19404
19405 parm = TYPE_ARG_TYPES (type);
19406 /* Figure out whether or not the function has a variable number of
19407 arguments. */
19408 for (; parm; parm = TREE_CHAIN (parm))
19409 if (TREE_VALUE (parm) == void_type_node)
19410 break;
19411 /* If not, the this parameter is in the first argument. */
19412 if (parm)
19413 {
19414 int regno = 0;
19415 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
19416 regno = 2;
19417 return gen_rtx_REG (SImode, regno);
19418 }
19419 }
19420
19421 if (aggregate_value_p (TREE_TYPE (type), type))
19422 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 8));
19423 else
19424 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, 4));
19425 }
19426
19427 /* Determine whether x86_output_mi_thunk can succeed. */
19428
19429 static bool
19430 x86_can_output_mi_thunk (tree thunk ATTRIBUTE_UNUSED,
19431 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
19432 HOST_WIDE_INT vcall_offset, tree function)
19433 {
19434 /* 64-bit can handle anything. */
19435 if (TARGET_64BIT)
19436 return true;
19437
19438 /* For 32-bit, everything's fine if we have one free register. */
19439 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
19440 return true;
19441
19442 /* Need a free register for vcall_offset. */
19443 if (vcall_offset)
19444 return false;
19445
19446 /* Need a free register for GOT references. */
19447 if (flag_pic && !(*targetm.binds_local_p) (function))
19448 return false;
19449
19450 /* Otherwise ok. */
19451 return true;
19452 }
19453
19454 /* Output the assembler code for a thunk function. THUNK_DECL is the
19455 declaration for the thunk function itself, FUNCTION is the decl for
19456 the target function. DELTA is an immediate constant offset to be
19457 added to THIS. If VCALL_OFFSET is nonzero, the word at
19458 *(*this + vcall_offset) should be added to THIS. */
19459
19460 static void
19461 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
19462 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
19463 HOST_WIDE_INT vcall_offset, tree function)
19464 {
19465 rtx xops[3];
19466 rtx this = x86_this_parameter (function);
19467 rtx this_reg, tmp;
19468
19469 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
19470 pull it in now and let DELTA benefit. */
19471 if (REG_P (this))
19472 this_reg = this;
19473 else if (vcall_offset)
19474 {
19475 /* Put the this parameter into %eax. */
19476 xops[0] = this;
19477 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
19478 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19479 }
19480 else
19481 this_reg = NULL_RTX;
19482
19483 /* Adjust the this parameter by a fixed constant. */
19484 if (delta)
19485 {
19486 xops[0] = GEN_INT (delta);
19487 xops[1] = this_reg ? this_reg : this;
19488 if (TARGET_64BIT)
19489 {
19490 if (!x86_64_general_operand (xops[0], DImode))
19491 {
19492 tmp = gen_rtx_REG (DImode, R10_REG);
19493 xops[1] = tmp;
19494 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
19495 xops[0] = tmp;
19496 xops[1] = this;
19497 }
19498 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19499 }
19500 else
19501 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19502 }
19503
19504 /* Adjust the this parameter by a value stored in the vtable. */
19505 if (vcall_offset)
19506 {
19507 if (TARGET_64BIT)
19508 tmp = gen_rtx_REG (DImode, R10_REG);
19509 else
19510 {
19511 int tmp_regno = 2 /* ECX */;
19512 if (lookup_attribute ("fastcall",
19513 TYPE_ATTRIBUTES (TREE_TYPE (function))))
19514 tmp_regno = 0 /* EAX */;
19515 tmp = gen_rtx_REG (SImode, tmp_regno);
19516 }
19517
19518 xops[0] = gen_rtx_MEM (Pmode, this_reg);
19519 xops[1] = tmp;
19520 if (TARGET_64BIT)
19521 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19522 else
19523 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19524
19525 /* Adjust the this parameter. */
19526 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
19527 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
19528 {
19529 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
19530 xops[0] = GEN_INT (vcall_offset);
19531 xops[1] = tmp2;
19532 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
19533 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
19534 }
19535 xops[1] = this_reg;
19536 if (TARGET_64BIT)
19537 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
19538 else
19539 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
19540 }
19541
19542 /* If necessary, drop THIS back to its stack slot. */
19543 if (this_reg && this_reg != this)
19544 {
19545 xops[0] = this_reg;
19546 xops[1] = this;
19547 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
19548 }
19549
19550 xops[0] = XEXP (DECL_RTL (function), 0);
19551 if (TARGET_64BIT)
19552 {
19553 if (!flag_pic || (*targetm.binds_local_p) (function))
19554 output_asm_insn ("jmp\t%P0", xops);
19555 else
19556 {
19557 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
19558 tmp = gen_rtx_CONST (Pmode, tmp);
19559 tmp = gen_rtx_MEM (QImode, tmp);
19560 xops[0] = tmp;
19561 output_asm_insn ("jmp\t%A0", xops);
19562 }
19563 }
19564 else
19565 {
19566 if (!flag_pic || (*targetm.binds_local_p) (function))
19567 output_asm_insn ("jmp\t%P0", xops);
19568 else
19569 #if TARGET_MACHO
19570 if (TARGET_MACHO)
19571 {
19572 rtx sym_ref = XEXP (DECL_RTL (function), 0);
19573 tmp = (gen_rtx_SYMBOL_REF
19574 (Pmode,
19575 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
19576 tmp = gen_rtx_MEM (QImode, tmp);
19577 xops[0] = tmp;
19578 output_asm_insn ("jmp\t%0", xops);
19579 }
19580 else
19581 #endif /* TARGET_MACHO */
19582 {
19583 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
19584 output_set_got (tmp, NULL_RTX);
19585
19586 xops[1] = tmp;
19587 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
19588 output_asm_insn ("jmp\t{*}%1", xops);
19589 }
19590 }
19591 }
19592
19593 static void
19594 x86_file_start (void)
19595 {
19596 default_file_start ();
19597 #if TARGET_MACHO
19598 darwin_file_start ();
19599 #endif
19600 if (X86_FILE_START_VERSION_DIRECTIVE)
19601 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
19602 if (X86_FILE_START_FLTUSED)
19603 fputs ("\t.global\t__fltused\n", asm_out_file);
19604 if (ix86_asm_dialect == ASM_INTEL)
19605 fputs ("\t.intel_syntax\n", asm_out_file);
19606 }
19607
19608 int
19609 x86_field_alignment (tree field, int computed)
19610 {
19611 enum machine_mode mode;
19612 tree type = TREE_TYPE (field);
19613
19614 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
19615 return computed;
19616 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
19617 ? get_inner_array_type (type) : type);
19618 if (mode == DFmode || mode == DCmode
19619 || GET_MODE_CLASS (mode) == MODE_INT
19620 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
19621 return MIN (32, computed);
19622 return computed;
19623 }
19624
19625 /* Output assembler code to FILE to increment profiler label # LABELNO
19626 for profiling a function entry. */
19627 void
19628 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
19629 {
19630 if (TARGET_64BIT)
19631 if (flag_pic)
19632 {
19633 #ifndef NO_PROFILE_COUNTERS
19634 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
19635 #endif
19636 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
19637 }
19638 else
19639 {
19640 #ifndef NO_PROFILE_COUNTERS
19641 fprintf (file, "\tmovq\t$%sP%d,%%r11\n", LPREFIX, labelno);
19642 #endif
19643 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19644 }
19645 else if (flag_pic)
19646 {
19647 #ifndef NO_PROFILE_COUNTERS
19648 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
19649 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
19650 #endif
19651 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
19652 }
19653 else
19654 {
19655 #ifndef NO_PROFILE_COUNTERS
19656 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
19657 PROFILE_COUNT_REGISTER);
19658 #endif
19659 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
19660 }
19661 }
19662
19663 /* We don't have exact information about the insn sizes, but we may assume
19664 quite safely that we are informed about all 1 byte insns and memory
19665 address sizes. This is enough to eliminate unnecessary padding in
19666 99% of cases. */
19667
19668 static int
19669 min_insn_size (rtx insn)
19670 {
19671 int l = 0;
19672
19673 if (!INSN_P (insn) || !active_insn_p (insn))
19674 return 0;
19675
19676 /* Discard alignments we've emit and jump instructions. */
19677 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
19678 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
19679 return 0;
19680 if (JUMP_P (insn)
19681 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
19682 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
19683 return 0;
19684
19685 /* Important case - calls are always 5 bytes.
19686 It is common to have many calls in the row. */
19687 if (CALL_P (insn)
19688 && symbolic_reference_mentioned_p (PATTERN (insn))
19689 && !SIBLING_CALL_P (insn))
19690 return 5;
19691 if (get_attr_length (insn) <= 1)
19692 return 1;
19693
19694 /* For normal instructions we may rely on the sizes of addresses
19695 and the presence of symbol to require 4 bytes of encoding.
19696 This is not the case for jumps where references are PC relative. */
19697 if (!JUMP_P (insn))
19698 {
19699 l = get_attr_length_address (insn);
19700 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
19701 l = 4;
19702 }
19703 if (l)
19704 return 1+l;
19705 else
19706 return 2;
19707 }
19708
19709 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
19710 window. */
19711
19712 static void
19713 ix86_avoid_jump_misspredicts (void)
19714 {
19715 rtx insn, start = get_insns ();
19716 int nbytes = 0, njumps = 0;
19717 int isjump = 0;
19718
19719 /* Look for all minimal intervals of instructions containing 4 jumps.
19720 The intervals are bounded by START and INSN. NBYTES is the total
19721 size of instructions in the interval including INSN and not including
19722 START. When the NBYTES is smaller than 16 bytes, it is possible
19723 that the end of START and INSN ends up in the same 16byte page.
19724
19725 The smallest offset in the page INSN can start is the case where START
19726 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
19727 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
19728 */
19729 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
19730 {
19731
19732 nbytes += min_insn_size (insn);
19733 if (dump_file)
19734 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
19735 INSN_UID (insn), min_insn_size (insn));
19736 if ((JUMP_P (insn)
19737 && GET_CODE (PATTERN (insn)) != ADDR_VEC
19738 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
19739 || CALL_P (insn))
19740 njumps++;
19741 else
19742 continue;
19743
19744 while (njumps > 3)
19745 {
19746 start = NEXT_INSN (start);
19747 if ((JUMP_P (start)
19748 && GET_CODE (PATTERN (start)) != ADDR_VEC
19749 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
19750 || CALL_P (start))
19751 njumps--, isjump = 1;
19752 else
19753 isjump = 0;
19754 nbytes -= min_insn_size (start);
19755 }
19756 gcc_assert (njumps >= 0);
19757 if (dump_file)
19758 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
19759 INSN_UID (start), INSN_UID (insn), nbytes);
19760
19761 if (njumps == 3 && isjump && nbytes < 16)
19762 {
19763 int padsize = 15 - nbytes + min_insn_size (insn);
19764
19765 if (dump_file)
19766 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
19767 INSN_UID (insn), padsize);
19768 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
19769 }
19770 }
19771 }
19772
19773 /* AMD Athlon works faster
19774 when RET is not destination of conditional jump or directly preceded
19775 by other jump instruction. We avoid the penalty by inserting NOP just
19776 before the RET instructions in such cases. */
19777 static void
19778 ix86_pad_returns (void)
19779 {
19780 edge e;
19781 edge_iterator ei;
19782
19783 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
19784 {
19785 basic_block bb = e->src;
19786 rtx ret = BB_END (bb);
19787 rtx prev;
19788 bool replace = false;
19789
19790 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
19791 || !maybe_hot_bb_p (bb))
19792 continue;
19793 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
19794 if (active_insn_p (prev) || LABEL_P (prev))
19795 break;
19796 if (prev && LABEL_P (prev))
19797 {
19798 edge e;
19799 edge_iterator ei;
19800
19801 FOR_EACH_EDGE (e, ei, bb->preds)
19802 if (EDGE_FREQUENCY (e) && e->src->index >= 0
19803 && !(e->flags & EDGE_FALLTHRU))
19804 replace = true;
19805 }
19806 if (!replace)
19807 {
19808 prev = prev_active_insn (ret);
19809 if (prev
19810 && ((JUMP_P (prev) && any_condjump_p (prev))
19811 || CALL_P (prev)))
19812 replace = true;
19813 /* Empty functions get branch mispredict even when the jump destination
19814 is not visible to us. */
19815 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
19816 replace = true;
19817 }
19818 if (replace)
19819 {
19820 emit_insn_before (gen_return_internal_long (), ret);
19821 delete_insn (ret);
19822 }
19823 }
19824 }
19825
19826 /* Implement machine specific optimizations. We implement padding of returns
19827 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
19828 static void
19829 ix86_reorg (void)
19830 {
19831 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
19832 ix86_pad_returns ();
19833 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
19834 ix86_avoid_jump_misspredicts ();
19835 }
19836
19837 /* Return nonzero when QImode register that must be represented via REX prefix
19838 is used. */
19839 bool
19840 x86_extended_QIreg_mentioned_p (rtx insn)
19841 {
19842 int i;
19843 extract_insn_cached (insn);
19844 for (i = 0; i < recog_data.n_operands; i++)
19845 if (REG_P (recog_data.operand[i])
19846 && REGNO (recog_data.operand[i]) >= 4)
19847 return true;
19848 return false;
19849 }
19850
19851 /* Return nonzero when P points to register encoded via REX prefix.
19852 Called via for_each_rtx. */
19853 static int
19854 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
19855 {
19856 unsigned int regno;
19857 if (!REG_P (*p))
19858 return 0;
19859 regno = REGNO (*p);
19860 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
19861 }
19862
19863 /* Return true when INSN mentions register that must be encoded using REX
19864 prefix. */
19865 bool
19866 x86_extended_reg_mentioned_p (rtx insn)
19867 {
19868 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
19869 }
19870
19871 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
19872 optabs would emit if we didn't have TFmode patterns. */
19873
19874 void
19875 x86_emit_floatuns (rtx operands[2])
19876 {
19877 rtx neglab, donelab, i0, i1, f0, in, out;
19878 enum machine_mode mode, inmode;
19879
19880 inmode = GET_MODE (operands[1]);
19881 gcc_assert (inmode == SImode || inmode == DImode);
19882
19883 out = operands[0];
19884 in = force_reg (inmode, operands[1]);
19885 mode = GET_MODE (out);
19886 neglab = gen_label_rtx ();
19887 donelab = gen_label_rtx ();
19888 f0 = gen_reg_rtx (mode);
19889
19890 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
19891
19892 expand_float (out, in, 0);
19893
19894 emit_jump_insn (gen_jump (donelab));
19895 emit_barrier ();
19896
19897 emit_label (neglab);
19898
19899 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
19900 1, OPTAB_DIRECT);
19901 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
19902 1, OPTAB_DIRECT);
19903 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
19904
19905 expand_float (f0, i0, 0);
19906
19907 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
19908
19909 emit_label (donelab);
19910 }
19911 \f
19912 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
19913 with all elements equal to VAR. Return true if successful. */
19914
19915 static bool
19916 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
19917 rtx target, rtx val)
19918 {
19919 enum machine_mode smode, wsmode, wvmode;
19920 rtx x;
19921
19922 switch (mode)
19923 {
19924 case V2SImode:
19925 case V2SFmode:
19926 if (!mmx_ok)
19927 return false;
19928 /* FALLTHRU */
19929
19930 case V2DFmode:
19931 case V2DImode:
19932 case V4SFmode:
19933 case V4SImode:
19934 val = force_reg (GET_MODE_INNER (mode), val);
19935 x = gen_rtx_VEC_DUPLICATE (mode, val);
19936 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19937 return true;
19938
19939 case V4HImode:
19940 if (!mmx_ok)
19941 return false;
19942 if (TARGET_SSE || TARGET_3DNOW_A)
19943 {
19944 val = gen_lowpart (SImode, val);
19945 x = gen_rtx_TRUNCATE (HImode, val);
19946 x = gen_rtx_VEC_DUPLICATE (mode, x);
19947 emit_insn (gen_rtx_SET (VOIDmode, target, x));
19948 return true;
19949 }
19950 else
19951 {
19952 smode = HImode;
19953 wsmode = SImode;
19954 wvmode = V2SImode;
19955 goto widen;
19956 }
19957
19958 case V8QImode:
19959 if (!mmx_ok)
19960 return false;
19961 smode = QImode;
19962 wsmode = HImode;
19963 wvmode = V4HImode;
19964 goto widen;
19965 case V8HImode:
19966 if (TARGET_SSE2)
19967 {
19968 rtx tmp1, tmp2;
19969 /* Extend HImode to SImode using a paradoxical SUBREG. */
19970 tmp1 = gen_reg_rtx (SImode);
19971 emit_move_insn (tmp1, gen_lowpart (SImode, val));
19972 /* Insert the SImode value as low element of V4SImode vector. */
19973 tmp2 = gen_reg_rtx (V4SImode);
19974 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
19975 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
19976 CONST0_RTX (V4SImode),
19977 const1_rtx);
19978 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
19979 /* Cast the V4SImode vector back to a V8HImode vector. */
19980 tmp1 = gen_reg_rtx (V8HImode);
19981 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
19982 /* Duplicate the low short through the whole low SImode word. */
19983 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
19984 /* Cast the V8HImode vector back to a V4SImode vector. */
19985 tmp2 = gen_reg_rtx (V4SImode);
19986 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
19987 /* Replicate the low element of the V4SImode vector. */
19988 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
19989 /* Cast the V2SImode back to V8HImode, and store in target. */
19990 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
19991 return true;
19992 }
19993 smode = HImode;
19994 wsmode = SImode;
19995 wvmode = V4SImode;
19996 goto widen;
19997 case V16QImode:
19998 if (TARGET_SSE2)
19999 {
20000 rtx tmp1, tmp2;
20001 /* Extend QImode to SImode using a paradoxical SUBREG. */
20002 tmp1 = gen_reg_rtx (SImode);
20003 emit_move_insn (tmp1, gen_lowpart (SImode, val));
20004 /* Insert the SImode value as low element of V4SImode vector. */
20005 tmp2 = gen_reg_rtx (V4SImode);
20006 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
20007 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
20008 CONST0_RTX (V4SImode),
20009 const1_rtx);
20010 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
20011 /* Cast the V4SImode vector back to a V16QImode vector. */
20012 tmp1 = gen_reg_rtx (V16QImode);
20013 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
20014 /* Duplicate the low byte through the whole low SImode word. */
20015 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20016 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
20017 /* Cast the V16QImode vector back to a V4SImode vector. */
20018 tmp2 = gen_reg_rtx (V4SImode);
20019 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
20020 /* Replicate the low element of the V4SImode vector. */
20021 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
20022 /* Cast the V2SImode back to V16QImode, and store in target. */
20023 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
20024 return true;
20025 }
20026 smode = QImode;
20027 wsmode = HImode;
20028 wvmode = V8HImode;
20029 goto widen;
20030 widen:
20031 /* Replicate the value once into the next wider mode and recurse. */
20032 val = convert_modes (wsmode, smode, val, true);
20033 x = expand_simple_binop (wsmode, ASHIFT, val,
20034 GEN_INT (GET_MODE_BITSIZE (smode)),
20035 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20036 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
20037
20038 x = gen_reg_rtx (wvmode);
20039 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
20040 gcc_unreachable ();
20041 emit_move_insn (target, gen_lowpart (mode, x));
20042 return true;
20043
20044 default:
20045 return false;
20046 }
20047 }
20048
20049 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20050 whose ONE_VAR element is VAR, and other elements are zero. Return true
20051 if successful. */
20052
20053 static bool
20054 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
20055 rtx target, rtx var, int one_var)
20056 {
20057 enum machine_mode vsimode;
20058 rtx new_target;
20059 rtx x, tmp;
20060
20061 switch (mode)
20062 {
20063 case V2SFmode:
20064 case V2SImode:
20065 if (!mmx_ok)
20066 return false;
20067 /* FALLTHRU */
20068
20069 case V2DFmode:
20070 case V2DImode:
20071 if (one_var != 0)
20072 return false;
20073 var = force_reg (GET_MODE_INNER (mode), var);
20074 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
20075 emit_insn (gen_rtx_SET (VOIDmode, target, x));
20076 return true;
20077
20078 case V4SFmode:
20079 case V4SImode:
20080 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
20081 new_target = gen_reg_rtx (mode);
20082 else
20083 new_target = target;
20084 var = force_reg (GET_MODE_INNER (mode), var);
20085 x = gen_rtx_VEC_DUPLICATE (mode, var);
20086 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
20087 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
20088 if (one_var != 0)
20089 {
20090 /* We need to shuffle the value to the correct position, so
20091 create a new pseudo to store the intermediate result. */
20092
20093 /* With SSE2, we can use the integer shuffle insns. */
20094 if (mode != V4SFmode && TARGET_SSE2)
20095 {
20096 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
20097 GEN_INT (1),
20098 GEN_INT (one_var == 1 ? 0 : 1),
20099 GEN_INT (one_var == 2 ? 0 : 1),
20100 GEN_INT (one_var == 3 ? 0 : 1)));
20101 if (target != new_target)
20102 emit_move_insn (target, new_target);
20103 return true;
20104 }
20105
20106 /* Otherwise convert the intermediate result to V4SFmode and
20107 use the SSE1 shuffle instructions. */
20108 if (mode != V4SFmode)
20109 {
20110 tmp = gen_reg_rtx (V4SFmode);
20111 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
20112 }
20113 else
20114 tmp = new_target;
20115
20116 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
20117 GEN_INT (1),
20118 GEN_INT (one_var == 1 ? 0 : 1),
20119 GEN_INT (one_var == 2 ? 0+4 : 1+4),
20120 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
20121
20122 if (mode != V4SFmode)
20123 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
20124 else if (tmp != target)
20125 emit_move_insn (target, tmp);
20126 }
20127 else if (target != new_target)
20128 emit_move_insn (target, new_target);
20129 return true;
20130
20131 case V8HImode:
20132 case V16QImode:
20133 vsimode = V4SImode;
20134 goto widen;
20135 case V4HImode:
20136 case V8QImode:
20137 if (!mmx_ok)
20138 return false;
20139 vsimode = V2SImode;
20140 goto widen;
20141 widen:
20142 if (one_var != 0)
20143 return false;
20144
20145 /* Zero extend the variable element to SImode and recurse. */
20146 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
20147
20148 x = gen_reg_rtx (vsimode);
20149 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
20150 var, one_var))
20151 gcc_unreachable ();
20152
20153 emit_move_insn (target, gen_lowpart (mode, x));
20154 return true;
20155
20156 default:
20157 return false;
20158 }
20159 }
20160
20161 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
20162 consisting of the values in VALS. It is known that all elements
20163 except ONE_VAR are constants. Return true if successful. */
20164
20165 static bool
20166 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
20167 rtx target, rtx vals, int one_var)
20168 {
20169 rtx var = XVECEXP (vals, 0, one_var);
20170 enum machine_mode wmode;
20171 rtx const_vec, x;
20172
20173 const_vec = copy_rtx (vals);
20174 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
20175 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
20176
20177 switch (mode)
20178 {
20179 case V2DFmode:
20180 case V2DImode:
20181 case V2SFmode:
20182 case V2SImode:
20183 /* For the two element vectors, it's just as easy to use
20184 the general case. */
20185 return false;
20186
20187 case V4SFmode:
20188 case V4SImode:
20189 case V8HImode:
20190 case V4HImode:
20191 break;
20192
20193 case V16QImode:
20194 wmode = V8HImode;
20195 goto widen;
20196 case V8QImode:
20197 wmode = V4HImode;
20198 goto widen;
20199 widen:
20200 /* There's no way to set one QImode entry easily. Combine
20201 the variable value with its adjacent constant value, and
20202 promote to an HImode set. */
20203 x = XVECEXP (vals, 0, one_var ^ 1);
20204 if (one_var & 1)
20205 {
20206 var = convert_modes (HImode, QImode, var, true);
20207 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
20208 NULL_RTX, 1, OPTAB_LIB_WIDEN);
20209 x = GEN_INT (INTVAL (x) & 0xff);
20210 }
20211 else
20212 {
20213 var = convert_modes (HImode, QImode, var, true);
20214 x = gen_int_mode (INTVAL (x) << 8, HImode);
20215 }
20216 if (x != const0_rtx)
20217 var = expand_simple_binop (HImode, IOR, var, x, var,
20218 1, OPTAB_LIB_WIDEN);
20219
20220 x = gen_reg_rtx (wmode);
20221 emit_move_insn (x, gen_lowpart (wmode, const_vec));
20222 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
20223
20224 emit_move_insn (target, gen_lowpart (mode, x));
20225 return true;
20226
20227 default:
20228 return false;
20229 }
20230
20231 emit_move_insn (target, const_vec);
20232 ix86_expand_vector_set (mmx_ok, target, var, one_var);
20233 return true;
20234 }
20235
20236 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
20237 all values variable, and none identical. */
20238
20239 static void
20240 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
20241 rtx target, rtx vals)
20242 {
20243 enum machine_mode half_mode = GET_MODE_INNER (mode);
20244 rtx op0 = NULL, op1 = NULL;
20245 bool use_vec_concat = false;
20246
20247 switch (mode)
20248 {
20249 case V2SFmode:
20250 case V2SImode:
20251 if (!mmx_ok && !TARGET_SSE)
20252 break;
20253 /* FALLTHRU */
20254
20255 case V2DFmode:
20256 case V2DImode:
20257 /* For the two element vectors, we always implement VEC_CONCAT. */
20258 op0 = XVECEXP (vals, 0, 0);
20259 op1 = XVECEXP (vals, 0, 1);
20260 use_vec_concat = true;
20261 break;
20262
20263 case V4SFmode:
20264 half_mode = V2SFmode;
20265 goto half;
20266 case V4SImode:
20267 half_mode = V2SImode;
20268 goto half;
20269 half:
20270 {
20271 rtvec v;
20272
20273 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
20274 Recurse to load the two halves. */
20275
20276 op0 = gen_reg_rtx (half_mode);
20277 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
20278 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
20279
20280 op1 = gen_reg_rtx (half_mode);
20281 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
20282 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
20283
20284 use_vec_concat = true;
20285 }
20286 break;
20287
20288 case V8HImode:
20289 case V16QImode:
20290 case V4HImode:
20291 case V8QImode:
20292 break;
20293
20294 default:
20295 gcc_unreachable ();
20296 }
20297
20298 if (use_vec_concat)
20299 {
20300 if (!register_operand (op0, half_mode))
20301 op0 = force_reg (half_mode, op0);
20302 if (!register_operand (op1, half_mode))
20303 op1 = force_reg (half_mode, op1);
20304
20305 emit_insn (gen_rtx_SET (VOIDmode, target,
20306 gen_rtx_VEC_CONCAT (mode, op0, op1)));
20307 }
20308 else
20309 {
20310 int i, j, n_elts, n_words, n_elt_per_word;
20311 enum machine_mode inner_mode;
20312 rtx words[4], shift;
20313
20314 inner_mode = GET_MODE_INNER (mode);
20315 n_elts = GET_MODE_NUNITS (mode);
20316 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
20317 n_elt_per_word = n_elts / n_words;
20318 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
20319
20320 for (i = 0; i < n_words; ++i)
20321 {
20322 rtx word = NULL_RTX;
20323
20324 for (j = 0; j < n_elt_per_word; ++j)
20325 {
20326 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
20327 elt = convert_modes (word_mode, inner_mode, elt, true);
20328
20329 if (j == 0)
20330 word = elt;
20331 else
20332 {
20333 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
20334 word, 1, OPTAB_LIB_WIDEN);
20335 word = expand_simple_binop (word_mode, IOR, word, elt,
20336 word, 1, OPTAB_LIB_WIDEN);
20337 }
20338 }
20339
20340 words[i] = word;
20341 }
20342
20343 if (n_words == 1)
20344 emit_move_insn (target, gen_lowpart (mode, words[0]));
20345 else if (n_words == 2)
20346 {
20347 rtx tmp = gen_reg_rtx (mode);
20348 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
20349 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
20350 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
20351 emit_move_insn (target, tmp);
20352 }
20353 else if (n_words == 4)
20354 {
20355 rtx tmp = gen_reg_rtx (V4SImode);
20356 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
20357 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
20358 emit_move_insn (target, gen_lowpart (mode, tmp));
20359 }
20360 else
20361 gcc_unreachable ();
20362 }
20363 }
20364
20365 /* Initialize vector TARGET via VALS. Suppress the use of MMX
20366 instructions unless MMX_OK is true. */
20367
20368 void
20369 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
20370 {
20371 enum machine_mode mode = GET_MODE (target);
20372 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20373 int n_elts = GET_MODE_NUNITS (mode);
20374 int n_var = 0, one_var = -1;
20375 bool all_same = true, all_const_zero = true;
20376 int i;
20377 rtx x;
20378
20379 for (i = 0; i < n_elts; ++i)
20380 {
20381 x = XVECEXP (vals, 0, i);
20382 if (!CONSTANT_P (x))
20383 n_var++, one_var = i;
20384 else if (x != CONST0_RTX (inner_mode))
20385 all_const_zero = false;
20386 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
20387 all_same = false;
20388 }
20389
20390 /* Constants are best loaded from the constant pool. */
20391 if (n_var == 0)
20392 {
20393 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
20394 return;
20395 }
20396
20397 /* If all values are identical, broadcast the value. */
20398 if (all_same
20399 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
20400 XVECEXP (vals, 0, 0)))
20401 return;
20402
20403 /* Values where only one field is non-constant are best loaded from
20404 the pool and overwritten via move later. */
20405 if (n_var == 1)
20406 {
20407 if (all_const_zero
20408 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
20409 XVECEXP (vals, 0, one_var),
20410 one_var))
20411 return;
20412
20413 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
20414 return;
20415 }
20416
20417 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
20418 }
20419
20420 void
20421 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
20422 {
20423 enum machine_mode mode = GET_MODE (target);
20424 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20425 bool use_vec_merge = false;
20426 rtx tmp;
20427
20428 switch (mode)
20429 {
20430 case V2SFmode:
20431 case V2SImode:
20432 if (mmx_ok)
20433 {
20434 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
20435 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
20436 if (elt == 0)
20437 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
20438 else
20439 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
20440 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20441 return;
20442 }
20443 break;
20444
20445 case V2DFmode:
20446 case V2DImode:
20447 {
20448 rtx op0, op1;
20449
20450 /* For the two element vectors, we implement a VEC_CONCAT with
20451 the extraction of the other element. */
20452
20453 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
20454 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
20455
20456 if (elt == 0)
20457 op0 = val, op1 = tmp;
20458 else
20459 op0 = tmp, op1 = val;
20460
20461 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
20462 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20463 }
20464 return;
20465
20466 case V4SFmode:
20467 switch (elt)
20468 {
20469 case 0:
20470 use_vec_merge = true;
20471 break;
20472
20473 case 1:
20474 /* tmp = target = A B C D */
20475 tmp = copy_to_reg (target);
20476 /* target = A A B B */
20477 emit_insn (gen_sse_unpcklps (target, target, target));
20478 /* target = X A B B */
20479 ix86_expand_vector_set (false, target, val, 0);
20480 /* target = A X C D */
20481 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20482 GEN_INT (1), GEN_INT (0),
20483 GEN_INT (2+4), GEN_INT (3+4)));
20484 return;
20485
20486 case 2:
20487 /* tmp = target = A B C D */
20488 tmp = copy_to_reg (target);
20489 /* tmp = X B C D */
20490 ix86_expand_vector_set (false, tmp, val, 0);
20491 /* target = A B X D */
20492 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20493 GEN_INT (0), GEN_INT (1),
20494 GEN_INT (0+4), GEN_INT (3+4)));
20495 return;
20496
20497 case 3:
20498 /* tmp = target = A B C D */
20499 tmp = copy_to_reg (target);
20500 /* tmp = X B C D */
20501 ix86_expand_vector_set (false, tmp, val, 0);
20502 /* target = A B X D */
20503 emit_insn (gen_sse_shufps_1 (target, target, tmp,
20504 GEN_INT (0), GEN_INT (1),
20505 GEN_INT (2+4), GEN_INT (0+4)));
20506 return;
20507
20508 default:
20509 gcc_unreachable ();
20510 }
20511 break;
20512
20513 case V4SImode:
20514 /* Element 0 handled by vec_merge below. */
20515 if (elt == 0)
20516 {
20517 use_vec_merge = true;
20518 break;
20519 }
20520
20521 if (TARGET_SSE2)
20522 {
20523 /* With SSE2, use integer shuffles to swap element 0 and ELT,
20524 store into element 0, then shuffle them back. */
20525
20526 rtx order[4];
20527
20528 order[0] = GEN_INT (elt);
20529 order[1] = const1_rtx;
20530 order[2] = const2_rtx;
20531 order[3] = GEN_INT (3);
20532 order[elt] = const0_rtx;
20533
20534 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20535 order[1], order[2], order[3]));
20536
20537 ix86_expand_vector_set (false, target, val, 0);
20538
20539 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
20540 order[1], order[2], order[3]));
20541 }
20542 else
20543 {
20544 /* For SSE1, we have to reuse the V4SF code. */
20545 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
20546 gen_lowpart (SFmode, val), elt);
20547 }
20548 return;
20549
20550 case V8HImode:
20551 use_vec_merge = TARGET_SSE2;
20552 break;
20553 case V4HImode:
20554 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20555 break;
20556
20557 case V16QImode:
20558 case V8QImode:
20559 default:
20560 break;
20561 }
20562
20563 if (use_vec_merge)
20564 {
20565 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
20566 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
20567 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20568 }
20569 else
20570 {
20571 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20572
20573 emit_move_insn (mem, target);
20574
20575 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20576 emit_move_insn (tmp, val);
20577
20578 emit_move_insn (target, mem);
20579 }
20580 }
20581
20582 void
20583 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
20584 {
20585 enum machine_mode mode = GET_MODE (vec);
20586 enum machine_mode inner_mode = GET_MODE_INNER (mode);
20587 bool use_vec_extr = false;
20588 rtx tmp;
20589
20590 switch (mode)
20591 {
20592 case V2SImode:
20593 case V2SFmode:
20594 if (!mmx_ok)
20595 break;
20596 /* FALLTHRU */
20597
20598 case V2DFmode:
20599 case V2DImode:
20600 use_vec_extr = true;
20601 break;
20602
20603 case V4SFmode:
20604 switch (elt)
20605 {
20606 case 0:
20607 tmp = vec;
20608 break;
20609
20610 case 1:
20611 case 3:
20612 tmp = gen_reg_rtx (mode);
20613 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
20614 GEN_INT (elt), GEN_INT (elt),
20615 GEN_INT (elt+4), GEN_INT (elt+4)));
20616 break;
20617
20618 case 2:
20619 tmp = gen_reg_rtx (mode);
20620 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
20621 break;
20622
20623 default:
20624 gcc_unreachable ();
20625 }
20626 vec = tmp;
20627 use_vec_extr = true;
20628 elt = 0;
20629 break;
20630
20631 case V4SImode:
20632 if (TARGET_SSE2)
20633 {
20634 switch (elt)
20635 {
20636 case 0:
20637 tmp = vec;
20638 break;
20639
20640 case 1:
20641 case 3:
20642 tmp = gen_reg_rtx (mode);
20643 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
20644 GEN_INT (elt), GEN_INT (elt),
20645 GEN_INT (elt), GEN_INT (elt)));
20646 break;
20647
20648 case 2:
20649 tmp = gen_reg_rtx (mode);
20650 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
20651 break;
20652
20653 default:
20654 gcc_unreachable ();
20655 }
20656 vec = tmp;
20657 use_vec_extr = true;
20658 elt = 0;
20659 }
20660 else
20661 {
20662 /* For SSE1, we have to reuse the V4SF code. */
20663 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
20664 gen_lowpart (V4SFmode, vec), elt);
20665 return;
20666 }
20667 break;
20668
20669 case V8HImode:
20670 use_vec_extr = TARGET_SSE2;
20671 break;
20672 case V4HImode:
20673 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
20674 break;
20675
20676 case V16QImode:
20677 case V8QImode:
20678 /* ??? Could extract the appropriate HImode element and shift. */
20679 default:
20680 break;
20681 }
20682
20683 if (use_vec_extr)
20684 {
20685 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
20686 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
20687
20688 /* Let the rtl optimizers know about the zero extension performed. */
20689 if (inner_mode == HImode)
20690 {
20691 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
20692 target = gen_lowpart (SImode, target);
20693 }
20694
20695 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
20696 }
20697 else
20698 {
20699 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
20700
20701 emit_move_insn (mem, vec);
20702
20703 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
20704 emit_move_insn (target, tmp);
20705 }
20706 }
20707
20708 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
20709 pattern to reduce; DEST is the destination; IN is the input vector. */
20710
20711 void
20712 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
20713 {
20714 rtx tmp1, tmp2, tmp3;
20715
20716 tmp1 = gen_reg_rtx (V4SFmode);
20717 tmp2 = gen_reg_rtx (V4SFmode);
20718 tmp3 = gen_reg_rtx (V4SFmode);
20719
20720 emit_insn (gen_sse_movhlps (tmp1, in, in));
20721 emit_insn (fn (tmp2, tmp1, in));
20722
20723 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
20724 GEN_INT (1), GEN_INT (1),
20725 GEN_INT (1+4), GEN_INT (1+4)));
20726 emit_insn (fn (dest, tmp2, tmp3));
20727 }
20728 \f
20729 /* Target hook for scalar_mode_supported_p. */
20730 static bool
20731 ix86_scalar_mode_supported_p (enum machine_mode mode)
20732 {
20733 if (DECIMAL_FLOAT_MODE_P (mode))
20734 return true;
20735 else
20736 return default_scalar_mode_supported_p (mode);
20737 }
20738
20739 /* Implements target hook vector_mode_supported_p. */
20740 static bool
20741 ix86_vector_mode_supported_p (enum machine_mode mode)
20742 {
20743 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
20744 return true;
20745 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
20746 return true;
20747 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
20748 return true;
20749 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
20750 return true;
20751 return false;
20752 }
20753
20754 /* Worker function for TARGET_MD_ASM_CLOBBERS.
20755
20756 We do this in the new i386 backend to maintain source compatibility
20757 with the old cc0-based compiler. */
20758
20759 static tree
20760 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
20761 tree inputs ATTRIBUTE_UNUSED,
20762 tree clobbers)
20763 {
20764 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
20765 clobbers);
20766 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
20767 clobbers);
20768 return clobbers;
20769 }
20770
20771 /* Implementes target vector targetm.asm.encode_section_info. This
20772 is not used by netware. */
20773
20774 static void ATTRIBUTE_UNUSED
20775 ix86_encode_section_info (tree decl, rtx rtl, int first)
20776 {
20777 default_encode_section_info (decl, rtl, first);
20778
20779 if (TREE_CODE (decl) == VAR_DECL
20780 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
20781 && ix86_in_large_data_p (decl))
20782 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
20783 }
20784
20785 /* Worker function for REVERSE_CONDITION. */
20786
20787 enum rtx_code
20788 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
20789 {
20790 return (mode != CCFPmode && mode != CCFPUmode
20791 ? reverse_condition (code)
20792 : reverse_condition_maybe_unordered (code));
20793 }
20794
20795 /* Output code to perform an x87 FP register move, from OPERANDS[1]
20796 to OPERANDS[0]. */
20797
20798 const char *
20799 output_387_reg_move (rtx insn, rtx *operands)
20800 {
20801 if (REG_P (operands[1])
20802 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
20803 {
20804 if (REGNO (operands[0]) == FIRST_STACK_REG)
20805 return output_387_ffreep (operands, 0);
20806 return "fstp\t%y0";
20807 }
20808 if (STACK_TOP_P (operands[0]))
20809 return "fld%z1\t%y1";
20810 return "fst\t%y0";
20811 }
20812
20813 /* Output code to perform a conditional jump to LABEL, if C2 flag in
20814 FP status register is set. */
20815
20816 void
20817 ix86_emit_fp_unordered_jump (rtx label)
20818 {
20819 rtx reg = gen_reg_rtx (HImode);
20820 rtx temp;
20821
20822 emit_insn (gen_x86_fnstsw_1 (reg));
20823
20824 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
20825 {
20826 emit_insn (gen_x86_sahf_1 (reg));
20827
20828 temp = gen_rtx_REG (CCmode, FLAGS_REG);
20829 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
20830 }
20831 else
20832 {
20833 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
20834
20835 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
20836 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
20837 }
20838
20839 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
20840 gen_rtx_LABEL_REF (VOIDmode, label),
20841 pc_rtx);
20842 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
20843
20844 emit_jump_insn (temp);
20845 predict_jump (REG_BR_PROB_BASE * 10 / 100);
20846 }
20847
20848 /* Output code to perform a log1p XFmode calculation. */
20849
20850 void ix86_emit_i387_log1p (rtx op0, rtx op1)
20851 {
20852 rtx label1 = gen_label_rtx ();
20853 rtx label2 = gen_label_rtx ();
20854
20855 rtx tmp = gen_reg_rtx (XFmode);
20856 rtx tmp2 = gen_reg_rtx (XFmode);
20857
20858 emit_insn (gen_absxf2 (tmp, op1));
20859 emit_insn (gen_cmpxf (tmp,
20860 CONST_DOUBLE_FROM_REAL_VALUE (
20861 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
20862 XFmode)));
20863 emit_jump_insn (gen_bge (label1));
20864
20865 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20866 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
20867 emit_jump (label2);
20868
20869 emit_label (label1);
20870 emit_move_insn (tmp, CONST1_RTX (XFmode));
20871 emit_insn (gen_addxf3 (tmp, op1, tmp));
20872 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
20873 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
20874
20875 emit_label (label2);
20876 }
20877
20878 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
20879
20880 static void ATTRIBUTE_UNUSED
20881 i386_solaris_elf_named_section (const char *name, unsigned int flags,
20882 tree decl)
20883 {
20884 /* With Binutils 2.15, the "@unwind" marker must be specified on
20885 every occurrence of the ".eh_frame" section, not just the first
20886 one. */
20887 if (TARGET_64BIT
20888 && strcmp (name, ".eh_frame") == 0)
20889 {
20890 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
20891 flags & SECTION_WRITE ? "aw" : "a");
20892 return;
20893 }
20894 default_elf_asm_named_section (name, flags, decl);
20895 }
20896
20897 /* Return the mangling of TYPE if it is an extended fundamental type. */
20898
20899 static const char *
20900 ix86_mangle_fundamental_type (tree type)
20901 {
20902 switch (TYPE_MODE (type))
20903 {
20904 case TFmode:
20905 /* __float128 is "g". */
20906 return "g";
20907 case XFmode:
20908 /* "long double" or __float80 is "e". */
20909 return "e";
20910 default:
20911 return NULL;
20912 }
20913 }
20914
20915 /* For 32-bit code we can save PIC register setup by using
20916 __stack_chk_fail_local hidden function instead of calling
20917 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
20918 register, so it is better to call __stack_chk_fail directly. */
20919
20920 static tree
20921 ix86_stack_protect_fail (void)
20922 {
20923 return TARGET_64BIT
20924 ? default_external_stack_protect_fail ()
20925 : default_hidden_stack_protect_fail ();
20926 }
20927
20928 /* Select a format to encode pointers in exception handling data. CODE
20929 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
20930 true if the symbol may be affected by dynamic relocations.
20931
20932 ??? All x86 object file formats are capable of representing this.
20933 After all, the relocation needed is the same as for the call insn.
20934 Whether or not a particular assembler allows us to enter such, I
20935 guess we'll have to see. */
20936 int
20937 asm_preferred_eh_data_format (int code, int global)
20938 {
20939 if (flag_pic)
20940 {
20941 int type = DW_EH_PE_sdata8;
20942 if (!TARGET_64BIT
20943 || ix86_cmodel == CM_SMALL_PIC
20944 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
20945 type = DW_EH_PE_sdata4;
20946 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
20947 }
20948 if (ix86_cmodel == CM_SMALL
20949 || (ix86_cmodel == CM_MEDIUM && code))
20950 return DW_EH_PE_udata4;
20951 return DW_EH_PE_absptr;
20952 }
20953 \f
20954 /* Expand copysign from SIGN to the positive value ABS_VALUE
20955 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
20956 the sign-bit. */
20957 static void
20958 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
20959 {
20960 enum machine_mode mode = GET_MODE (sign);
20961 rtx sgn = gen_reg_rtx (mode);
20962 if (mask == NULL_RTX)
20963 {
20964 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
20965 if (!VECTOR_MODE_P (mode))
20966 {
20967 /* We need to generate a scalar mode mask in this case. */
20968 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20969 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20970 mask = gen_reg_rtx (mode);
20971 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
20972 }
20973 }
20974 else
20975 mask = gen_rtx_NOT (mode, mask);
20976 emit_insn (gen_rtx_SET (VOIDmode, sgn,
20977 gen_rtx_AND (mode, mask, sign)));
20978 emit_insn (gen_rtx_SET (VOIDmode, result,
20979 gen_rtx_IOR (mode, abs_value, sgn)));
20980 }
20981
20982 /* Expand fabs (OP0) and return a new rtx that holds the result. The
20983 mask for masking out the sign-bit is stored in *SMASK, if that is
20984 non-null. */
20985 static rtx
20986 ix86_expand_sse_fabs (rtx op0, rtx *smask)
20987 {
20988 enum machine_mode mode = GET_MODE (op0);
20989 rtx xa, mask;
20990
20991 xa = gen_reg_rtx (mode);
20992 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
20993 if (!VECTOR_MODE_P (mode))
20994 {
20995 /* We need to generate a scalar mode mask in this case. */
20996 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
20997 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
20998 mask = gen_reg_rtx (mode);
20999 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
21000 }
21001 emit_insn (gen_rtx_SET (VOIDmode, xa,
21002 gen_rtx_AND (mode, op0, mask)));
21003
21004 if (smask)
21005 *smask = mask;
21006
21007 return xa;
21008 }
21009
21010 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
21011 swapping the operands if SWAP_OPERANDS is true. The expanded
21012 code is a forward jump to a newly created label in case the
21013 comparison is true. The generated label rtx is returned. */
21014 static rtx
21015 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
21016 bool swap_operands)
21017 {
21018 rtx label, tmp;
21019
21020 if (swap_operands)
21021 {
21022 tmp = op0;
21023 op0 = op1;
21024 op1 = tmp;
21025 }
21026
21027 label = gen_label_rtx ();
21028 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
21029 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21030 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
21031 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
21032 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
21033 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
21034 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
21035 JUMP_LABEL (tmp) = label;
21036
21037 return label;
21038 }
21039
21040 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
21041 using comparison code CODE. Operands are swapped for the comparison if
21042 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
21043 static rtx
21044 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
21045 bool swap_operands)
21046 {
21047 enum machine_mode mode = GET_MODE (op0);
21048 rtx mask = gen_reg_rtx (mode);
21049
21050 if (swap_operands)
21051 {
21052 rtx tmp = op0;
21053 op0 = op1;
21054 op1 = tmp;
21055 }
21056
21057 if (mode == DFmode)
21058 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
21059 gen_rtx_fmt_ee (code, mode, op0, op1)));
21060 else
21061 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
21062 gen_rtx_fmt_ee (code, mode, op0, op1)));
21063
21064 return mask;
21065 }
21066
21067 /* Generate and return a rtx of mode MODE for 2**n where n is the number
21068 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
21069 static rtx
21070 ix86_gen_TWO52 (enum machine_mode mode)
21071 {
21072 REAL_VALUE_TYPE TWO52r;
21073 rtx TWO52;
21074
21075 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
21076 TWO52 = const_double_from_real_value (TWO52r, mode);
21077 TWO52 = force_reg (mode, TWO52);
21078
21079 return TWO52;
21080 }
21081
21082 /* Expand SSE sequence for computing lround from OP1 storing
21083 into OP0. */
21084 void
21085 ix86_expand_lround (rtx op0, rtx op1)
21086 {
21087 /* C code for the stuff we're doing below:
21088 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
21089 return (long)tmp;
21090 */
21091 enum machine_mode mode = GET_MODE (op1);
21092 const struct real_format *fmt;
21093 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21094 rtx adj;
21095
21096 /* load nextafter (0.5, 0.0) */
21097 fmt = REAL_MODE_FORMAT (mode);
21098 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21099 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21100
21101 /* adj = copysign (0.5, op1) */
21102 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
21103 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
21104
21105 /* adj = op1 + adj */
21106 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
21107
21108 /* op0 = (imode)adj */
21109 expand_fix (op0, adj, 0);
21110 }
21111
21112 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
21113 into OPERAND0. */
21114 void
21115 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
21116 {
21117 /* C code for the stuff we're doing below (for do_floor):
21118 xi = (long)op1;
21119 xi -= (double)xi > op1 ? 1 : 0;
21120 return xi;
21121 */
21122 enum machine_mode fmode = GET_MODE (op1);
21123 enum machine_mode imode = GET_MODE (op0);
21124 rtx ireg, freg, label, tmp;
21125
21126 /* reg = (long)op1 */
21127 ireg = gen_reg_rtx (imode);
21128 expand_fix (ireg, op1, 0);
21129
21130 /* freg = (double)reg */
21131 freg = gen_reg_rtx (fmode);
21132 expand_float (freg, ireg, 0);
21133
21134 /* ireg = (freg > op1) ? ireg - 1 : ireg */
21135 label = ix86_expand_sse_compare_and_jump (UNLE,
21136 freg, op1, !do_floor);
21137 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
21138 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
21139 emit_move_insn (ireg, tmp);
21140
21141 emit_label (label);
21142 LABEL_NUSES (label) = 1;
21143
21144 emit_move_insn (op0, ireg);
21145 }
21146
21147 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
21148 result in OPERAND0. */
21149 void
21150 ix86_expand_rint (rtx operand0, rtx operand1)
21151 {
21152 /* C code for the stuff we're doing below:
21153 xa = fabs (operand1);
21154 if (!isless (xa, 2**52))
21155 return operand1;
21156 xa = xa + 2**52 - 2**52;
21157 return copysign (xa, operand1);
21158 */
21159 enum machine_mode mode = GET_MODE (operand0);
21160 rtx res, xa, label, TWO52, mask;
21161
21162 res = gen_reg_rtx (mode);
21163 emit_move_insn (res, operand1);
21164
21165 /* xa = abs (operand1) */
21166 xa = ix86_expand_sse_fabs (res, &mask);
21167
21168 /* if (!isless (xa, TWO52)) goto label; */
21169 TWO52 = ix86_gen_TWO52 (mode);
21170 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21171
21172 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21173 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21174
21175 ix86_sse_copysign_to_positive (res, xa, res, mask);
21176
21177 emit_label (label);
21178 LABEL_NUSES (label) = 1;
21179
21180 emit_move_insn (operand0, res);
21181 }
21182
21183 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21184 into OPERAND0. */
21185 void
21186 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
21187 {
21188 /* C code for the stuff we expand below.
21189 double xa = fabs (x), x2;
21190 if (!isless (xa, TWO52))
21191 return x;
21192 xa = xa + TWO52 - TWO52;
21193 x2 = copysign (xa, x);
21194 Compensate. Floor:
21195 if (x2 > x)
21196 x2 -= 1;
21197 Compensate. Ceil:
21198 if (x2 < x)
21199 x2 -= -1;
21200 return x2;
21201 */
21202 enum machine_mode mode = GET_MODE (operand0);
21203 rtx xa, TWO52, tmp, label, one, res, mask;
21204
21205 TWO52 = ix86_gen_TWO52 (mode);
21206
21207 /* Temporary for holding the result, initialized to the input
21208 operand to ease control flow. */
21209 res = gen_reg_rtx (mode);
21210 emit_move_insn (res, operand1);
21211
21212 /* xa = abs (operand1) */
21213 xa = ix86_expand_sse_fabs (res, &mask);
21214
21215 /* if (!isless (xa, TWO52)) goto label; */
21216 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21217
21218 /* xa = xa + TWO52 - TWO52; */
21219 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21220 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
21221
21222 /* xa = copysign (xa, operand1) */
21223 ix86_sse_copysign_to_positive (xa, xa, res, mask);
21224
21225 /* generate 1.0 or -1.0 */
21226 one = force_reg (mode,
21227 const_double_from_real_value (do_floor
21228 ? dconst1 : dconstm1, mode));
21229
21230 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21231 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21232 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21233 gen_rtx_AND (mode, one, tmp)));
21234 /* We always need to subtract here to preserve signed zero. */
21235 tmp = expand_simple_binop (mode, MINUS,
21236 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21237 emit_move_insn (res, tmp);
21238
21239 emit_label (label);
21240 LABEL_NUSES (label) = 1;
21241
21242 emit_move_insn (operand0, res);
21243 }
21244
21245 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
21246 into OPERAND0. */
21247 void
21248 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
21249 {
21250 /* C code for the stuff we expand below.
21251 double xa = fabs (x), x2;
21252 if (!isless (xa, TWO52))
21253 return x;
21254 x2 = (double)(long)x;
21255 Compensate. Floor:
21256 if (x2 > x)
21257 x2 -= 1;
21258 Compensate. Ceil:
21259 if (x2 < x)
21260 x2 += 1;
21261 if (HONOR_SIGNED_ZEROS (mode))
21262 return copysign (x2, x);
21263 return x2;
21264 */
21265 enum machine_mode mode = GET_MODE (operand0);
21266 rtx xa, xi, TWO52, tmp, label, one, res, mask;
21267
21268 TWO52 = ix86_gen_TWO52 (mode);
21269
21270 /* Temporary for holding the result, initialized to the input
21271 operand to ease control flow. */
21272 res = gen_reg_rtx (mode);
21273 emit_move_insn (res, operand1);
21274
21275 /* xa = abs (operand1) */
21276 xa = ix86_expand_sse_fabs (res, &mask);
21277
21278 /* if (!isless (xa, TWO52)) goto label; */
21279 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21280
21281 /* xa = (double)(long)x */
21282 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21283 expand_fix (xi, res, 0);
21284 expand_float (xa, xi, 0);
21285
21286 /* generate 1.0 */
21287 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21288
21289 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
21290 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
21291 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21292 gen_rtx_AND (mode, one, tmp)));
21293 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
21294 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21295 emit_move_insn (res, tmp);
21296
21297 if (HONOR_SIGNED_ZEROS (mode))
21298 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21299
21300 emit_label (label);
21301 LABEL_NUSES (label) = 1;
21302
21303 emit_move_insn (operand0, res);
21304 }
21305
21306 /* Expand SSE sequence for computing round from OPERAND1 storing
21307 into OPERAND0. Sequence that works without relying on DImode truncation
21308 via cvttsd2siq that is only available on 64bit targets. */
21309 void
21310 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
21311 {
21312 /* C code for the stuff we expand below.
21313 double xa = fabs (x), xa2, x2;
21314 if (!isless (xa, TWO52))
21315 return x;
21316 Using the absolute value and copying back sign makes
21317 -0.0 -> -0.0 correct.
21318 xa2 = xa + TWO52 - TWO52;
21319 Compensate.
21320 dxa = xa2 - xa;
21321 if (dxa <= -0.5)
21322 xa2 += 1;
21323 else if (dxa > 0.5)
21324 xa2 -= 1;
21325 x2 = copysign (xa2, x);
21326 return x2;
21327 */
21328 enum machine_mode mode = GET_MODE (operand0);
21329 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
21330
21331 TWO52 = ix86_gen_TWO52 (mode);
21332
21333 /* Temporary for holding the result, initialized to the input
21334 operand to ease control flow. */
21335 res = gen_reg_rtx (mode);
21336 emit_move_insn (res, operand1);
21337
21338 /* xa = abs (operand1) */
21339 xa = ix86_expand_sse_fabs (res, &mask);
21340
21341 /* if (!isless (xa, TWO52)) goto label; */
21342 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21343
21344 /* xa2 = xa + TWO52 - TWO52; */
21345 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21346 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
21347
21348 /* dxa = xa2 - xa; */
21349 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
21350
21351 /* generate 0.5, 1.0 and -0.5 */
21352 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
21353 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
21354 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
21355 0, OPTAB_DIRECT);
21356
21357 /* Compensate. */
21358 tmp = gen_reg_rtx (mode);
21359 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
21360 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
21361 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21362 gen_rtx_AND (mode, one, tmp)));
21363 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21364 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
21365 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
21366 emit_insn (gen_rtx_SET (VOIDmode, tmp,
21367 gen_rtx_AND (mode, one, tmp)));
21368 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
21369
21370 /* res = copysign (xa2, operand1) */
21371 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
21372
21373 emit_label (label);
21374 LABEL_NUSES (label) = 1;
21375
21376 emit_move_insn (operand0, res);
21377 }
21378
21379 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21380 into OPERAND0. */
21381 void
21382 ix86_expand_trunc (rtx operand0, rtx operand1)
21383 {
21384 /* C code for SSE variant we expand below.
21385 double xa = fabs (x), x2;
21386 if (!isless (xa, TWO52))
21387 return x;
21388 x2 = (double)(long)x;
21389 if (HONOR_SIGNED_ZEROS (mode))
21390 return copysign (x2, x);
21391 return x2;
21392 */
21393 enum machine_mode mode = GET_MODE (operand0);
21394 rtx xa, xi, TWO52, label, res, mask;
21395
21396 TWO52 = ix86_gen_TWO52 (mode);
21397
21398 /* Temporary for holding the result, initialized to the input
21399 operand to ease control flow. */
21400 res = gen_reg_rtx (mode);
21401 emit_move_insn (res, operand1);
21402
21403 /* xa = abs (operand1) */
21404 xa = ix86_expand_sse_fabs (res, &mask);
21405
21406 /* if (!isless (xa, TWO52)) goto label; */
21407 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21408
21409 /* x = (double)(long)x */
21410 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21411 expand_fix (xi, res, 0);
21412 expand_float (res, xi, 0);
21413
21414 if (HONOR_SIGNED_ZEROS (mode))
21415 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
21416
21417 emit_label (label);
21418 LABEL_NUSES (label) = 1;
21419
21420 emit_move_insn (operand0, res);
21421 }
21422
21423 /* Expand SSE sequence for computing trunc from OPERAND1 storing
21424 into OPERAND0. */
21425 void
21426 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
21427 {
21428 enum machine_mode mode = GET_MODE (operand0);
21429 rtx xa, mask, TWO52, label, one, res, smask, tmp;
21430
21431 /* C code for SSE variant we expand below.
21432 double xa = fabs (x), x2;
21433 if (!isless (xa, TWO52))
21434 return x;
21435 xa2 = xa + TWO52 - TWO52;
21436 Compensate:
21437 if (xa2 > xa)
21438 xa2 -= 1.0;
21439 x2 = copysign (xa2, x);
21440 return x2;
21441 */
21442
21443 TWO52 = ix86_gen_TWO52 (mode);
21444
21445 /* Temporary for holding the result, initialized to the input
21446 operand to ease control flow. */
21447 res = gen_reg_rtx (mode);
21448 emit_move_insn (res, operand1);
21449
21450 /* xa = abs (operand1) */
21451 xa = ix86_expand_sse_fabs (res, &smask);
21452
21453 /* if (!isless (xa, TWO52)) goto label; */
21454 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21455
21456 /* res = xa + TWO52 - TWO52; */
21457 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
21458 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
21459 emit_move_insn (res, tmp);
21460
21461 /* generate 1.0 */
21462 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
21463
21464 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
21465 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
21466 emit_insn (gen_rtx_SET (VOIDmode, mask,
21467 gen_rtx_AND (mode, mask, one)));
21468 tmp = expand_simple_binop (mode, MINUS,
21469 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
21470 emit_move_insn (res, tmp);
21471
21472 /* res = copysign (res, operand1) */
21473 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
21474
21475 emit_label (label);
21476 LABEL_NUSES (label) = 1;
21477
21478 emit_move_insn (operand0, res);
21479 }
21480
21481 /* Expand SSE sequence for computing round from OPERAND1 storing
21482 into OPERAND0. */
21483 void
21484 ix86_expand_round (rtx operand0, rtx operand1)
21485 {
21486 /* C code for the stuff we're doing below:
21487 double xa = fabs (x);
21488 if (!isless (xa, TWO52))
21489 return x;
21490 xa = (double)(long)(xa + nextafter (0.5, 0.0));
21491 return copysign (xa, x);
21492 */
21493 enum machine_mode mode = GET_MODE (operand0);
21494 rtx res, TWO52, xa, label, xi, half, mask;
21495 const struct real_format *fmt;
21496 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
21497
21498 /* Temporary for holding the result, initialized to the input
21499 operand to ease control flow. */
21500 res = gen_reg_rtx (mode);
21501 emit_move_insn (res, operand1);
21502
21503 TWO52 = ix86_gen_TWO52 (mode);
21504 xa = ix86_expand_sse_fabs (res, &mask);
21505 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
21506
21507 /* load nextafter (0.5, 0.0) */
21508 fmt = REAL_MODE_FORMAT (mode);
21509 real_2expN (&half_minus_pred_half, -(fmt->p) - 1);
21510 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
21511
21512 /* xa = xa + 0.5 */
21513 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
21514 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
21515
21516 /* xa = (double)(int64_t)xa */
21517 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
21518 expand_fix (xi, xa, 0);
21519 expand_float (xa, xi, 0);
21520
21521 /* res = copysign (xa, operand1) */
21522 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
21523
21524 emit_label (label);
21525 LABEL_NUSES (label) = 1;
21526
21527 emit_move_insn (operand0, res);
21528 }
21529
21530 \f
21531 /* Table of valid machine attributes. */
21532 static const struct attribute_spec ix86_attribute_table[] =
21533 {
21534 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
21535 /* Stdcall attribute says callee is responsible for popping arguments
21536 if they are not variable. */
21537 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21538 /* Fastcall attribute says callee is responsible for popping arguments
21539 if they are not variable. */
21540 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21541 /* Cdecl attribute says the callee is a normal C declaration */
21542 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21543 /* Regparm attribute specifies how many integer arguments are to be
21544 passed in registers. */
21545 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
21546 /* Sseregparm attribute says we are using x86_64 calling conventions
21547 for FP arguments. */
21548 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
21549 /* force_align_arg_pointer says this function realigns the stack at entry. */
21550 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
21551 false, true, true, ix86_handle_cconv_attribute },
21552 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21553 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
21554 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
21555 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
21556 #endif
21557 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21558 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
21559 #ifdef SUBTARGET_ATTRIBUTE_TABLE
21560 SUBTARGET_ATTRIBUTE_TABLE,
21561 #endif
21562 { NULL, 0, 0, false, false, false, NULL }
21563 };
21564
21565 /* Initialize the GCC target structure. */
21566 #undef TARGET_ATTRIBUTE_TABLE
21567 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
21568 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
21569 # undef TARGET_MERGE_DECL_ATTRIBUTES
21570 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
21571 #endif
21572
21573 #undef TARGET_COMP_TYPE_ATTRIBUTES
21574 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
21575
21576 #undef TARGET_INIT_BUILTINS
21577 #define TARGET_INIT_BUILTINS ix86_init_builtins
21578 #undef TARGET_EXPAND_BUILTIN
21579 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
21580
21581 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
21582 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION ix86_builtin_vectorized_function
21583 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
21584 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_builtin_conversion
21585
21586 #undef TARGET_ASM_FUNCTION_EPILOGUE
21587 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
21588
21589 #undef TARGET_ENCODE_SECTION_INFO
21590 #ifndef SUBTARGET_ENCODE_SECTION_INFO
21591 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
21592 #else
21593 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
21594 #endif
21595
21596 #undef TARGET_ASM_OPEN_PAREN
21597 #define TARGET_ASM_OPEN_PAREN ""
21598 #undef TARGET_ASM_CLOSE_PAREN
21599 #define TARGET_ASM_CLOSE_PAREN ""
21600
21601 #undef TARGET_ASM_ALIGNED_HI_OP
21602 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
21603 #undef TARGET_ASM_ALIGNED_SI_OP
21604 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
21605 #ifdef ASM_QUAD
21606 #undef TARGET_ASM_ALIGNED_DI_OP
21607 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
21608 #endif
21609
21610 #undef TARGET_ASM_UNALIGNED_HI_OP
21611 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
21612 #undef TARGET_ASM_UNALIGNED_SI_OP
21613 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
21614 #undef TARGET_ASM_UNALIGNED_DI_OP
21615 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
21616
21617 #undef TARGET_SCHED_ADJUST_COST
21618 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
21619 #undef TARGET_SCHED_ISSUE_RATE
21620 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
21621 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
21622 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
21623 ia32_multipass_dfa_lookahead
21624
21625 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
21626 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
21627
21628 #ifdef HAVE_AS_TLS
21629 #undef TARGET_HAVE_TLS
21630 #define TARGET_HAVE_TLS true
21631 #endif
21632 #undef TARGET_CANNOT_FORCE_CONST_MEM
21633 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
21634 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
21635 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_rtx_true
21636
21637 #undef TARGET_DELEGITIMIZE_ADDRESS
21638 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
21639
21640 #undef TARGET_MS_BITFIELD_LAYOUT_P
21641 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
21642
21643 #if TARGET_MACHO
21644 #undef TARGET_BINDS_LOCAL_P
21645 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
21646 #endif
21647
21648 #undef TARGET_ASM_OUTPUT_MI_THUNK
21649 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
21650 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
21651 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
21652
21653 #undef TARGET_ASM_FILE_START
21654 #define TARGET_ASM_FILE_START x86_file_start
21655
21656 #undef TARGET_DEFAULT_TARGET_FLAGS
21657 #define TARGET_DEFAULT_TARGET_FLAGS \
21658 (TARGET_DEFAULT \
21659 | TARGET_64BIT_DEFAULT \
21660 | TARGET_SUBTARGET_DEFAULT \
21661 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
21662
21663 #undef TARGET_HANDLE_OPTION
21664 #define TARGET_HANDLE_OPTION ix86_handle_option
21665
21666 #undef TARGET_RTX_COSTS
21667 #define TARGET_RTX_COSTS ix86_rtx_costs
21668 #undef TARGET_ADDRESS_COST
21669 #define TARGET_ADDRESS_COST ix86_address_cost
21670
21671 #undef TARGET_FIXED_CONDITION_CODE_REGS
21672 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
21673 #undef TARGET_CC_MODES_COMPATIBLE
21674 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
21675
21676 #undef TARGET_MACHINE_DEPENDENT_REORG
21677 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
21678
21679 #undef TARGET_BUILD_BUILTIN_VA_LIST
21680 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
21681
21682 #undef TARGET_MD_ASM_CLOBBERS
21683 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
21684
21685 #undef TARGET_PROMOTE_PROTOTYPES
21686 #define TARGET_PROMOTE_PROTOTYPES hook_bool_tree_true
21687 #undef TARGET_STRUCT_VALUE_RTX
21688 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
21689 #undef TARGET_SETUP_INCOMING_VARARGS
21690 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
21691 #undef TARGET_MUST_PASS_IN_STACK
21692 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
21693 #undef TARGET_PASS_BY_REFERENCE
21694 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
21695 #undef TARGET_INTERNAL_ARG_POINTER
21696 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
21697 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
21698 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
21699
21700 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
21701 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
21702
21703 #undef TARGET_SCALAR_MODE_SUPPORTED_P
21704 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
21705
21706 #undef TARGET_VECTOR_MODE_SUPPORTED_P
21707 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
21708
21709 #ifdef HAVE_AS_TLS
21710 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
21711 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
21712 #endif
21713
21714 #ifdef SUBTARGET_INSERT_ATTRIBUTES
21715 #undef TARGET_INSERT_ATTRIBUTES
21716 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
21717 #endif
21718
21719 #undef TARGET_MANGLE_FUNDAMENTAL_TYPE
21720 #define TARGET_MANGLE_FUNDAMENTAL_TYPE ix86_mangle_fundamental_type
21721
21722 #undef TARGET_STACK_PROTECT_FAIL
21723 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
21724
21725 #undef TARGET_FUNCTION_VALUE
21726 #define TARGET_FUNCTION_VALUE ix86_function_value
21727
21728 struct gcc_target targetm = TARGET_INITIALIZER;
21729 \f
21730 #include "gt-i386.h"