f6f80a05969bc127f4f2446be78e5a7dd86879ed
[gcc.git] / gcc / config / i386 / i386.c
1 /* Subroutines used for code generation on IA-32.
2 Copyright (C) 1988, 1992, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001,
3 2002, 2003, 2004, 2005, 2006, 2007 Free Software Foundation, Inc.
4
5 This file is part of GCC.
6
7 GCC is free software; you can redistribute it and/or modify
8 it under the terms of the GNU General Public License as published by
9 the Free Software Foundation; either version 3, or (at your option)
10 any later version.
11
12 GCC is distributed in the hope that it will be useful,
13 but WITHOUT ANY WARRANTY; without even the implied warranty of
14 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 GNU General Public License for more details.
16
17 You should have received a copy of the GNU General Public License
18 along with GCC; see the file COPYING3. If not see
19 <http://www.gnu.org/licenses/>. */
20
21 #include "config.h"
22 #include "system.h"
23 #include "coretypes.h"
24 #include "tm.h"
25 #include "rtl.h"
26 #include "tree.h"
27 #include "tm_p.h"
28 #include "regs.h"
29 #include "hard-reg-set.h"
30 #include "real.h"
31 #include "insn-config.h"
32 #include "conditions.h"
33 #include "output.h"
34 #include "insn-codes.h"
35 #include "insn-attr.h"
36 #include "flags.h"
37 #include "except.h"
38 #include "function.h"
39 #include "recog.h"
40 #include "expr.h"
41 #include "optabs.h"
42 #include "toplev.h"
43 #include "basic-block.h"
44 #include "ggc.h"
45 #include "target.h"
46 #include "target-def.h"
47 #include "langhooks.h"
48 #include "cgraph.h"
49 #include "tree-gimple.h"
50 #include "dwarf2.h"
51 #include "df.h"
52 #include "tm-constrs.h"
53 #include "params.h"
54
55 #ifndef CHECK_STACK_LIMIT
56 #define CHECK_STACK_LIMIT (-1)
57 #endif
58
59 /* Return index of given mode in mult and division cost tables. */
60 #define MODE_INDEX(mode) \
61 ((mode) == QImode ? 0 \
62 : (mode) == HImode ? 1 \
63 : (mode) == SImode ? 2 \
64 : (mode) == DImode ? 3 \
65 : 4)
66
67 /* Processor costs (relative to an add) */
68 /* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes. */
69 #define COSTS_N_BYTES(N) ((N) * 2)
70
71 #define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall}}}
72
73 static const
74 struct processor_costs size_cost = { /* costs for tuning for size */
75 COSTS_N_BYTES (2), /* cost of an add instruction */
76 COSTS_N_BYTES (3), /* cost of a lea instruction */
77 COSTS_N_BYTES (2), /* variable shift costs */
78 COSTS_N_BYTES (3), /* constant shift costs */
79 {COSTS_N_BYTES (3), /* cost of starting multiply for QI */
80 COSTS_N_BYTES (3), /* HI */
81 COSTS_N_BYTES (3), /* SI */
82 COSTS_N_BYTES (3), /* DI */
83 COSTS_N_BYTES (5)}, /* other */
84 0, /* cost of multiply per each bit set */
85 {COSTS_N_BYTES (3), /* cost of a divide/mod for QI */
86 COSTS_N_BYTES (3), /* HI */
87 COSTS_N_BYTES (3), /* SI */
88 COSTS_N_BYTES (3), /* DI */
89 COSTS_N_BYTES (5)}, /* other */
90 COSTS_N_BYTES (3), /* cost of movsx */
91 COSTS_N_BYTES (3), /* cost of movzx */
92 0, /* "large" insn */
93 2, /* MOVE_RATIO */
94 2, /* cost for loading QImode using movzbl */
95 {2, 2, 2}, /* cost of loading integer registers
96 in QImode, HImode and SImode.
97 Relative to reg-reg move (2). */
98 {2, 2, 2}, /* cost of storing integer registers */
99 2, /* cost of reg,reg fld/fst */
100 {2, 2, 2}, /* cost of loading fp registers
101 in SFmode, DFmode and XFmode */
102 {2, 2, 2}, /* cost of storing fp registers
103 in SFmode, DFmode and XFmode */
104 3, /* cost of moving MMX register */
105 {3, 3}, /* cost of loading MMX registers
106 in SImode and DImode */
107 {3, 3}, /* cost of storing MMX registers
108 in SImode and DImode */
109 3, /* cost of moving SSE register */
110 {3, 3, 3}, /* cost of loading SSE registers
111 in SImode, DImode and TImode */
112 {3, 3, 3}, /* cost of storing SSE registers
113 in SImode, DImode and TImode */
114 3, /* MMX or SSE register to integer */
115 0, /* size of l1 cache */
116 0, /* size of l2 cache */
117 0, /* size of prefetch block */
118 0, /* number of parallel prefetches */
119 2, /* Branch cost */
120 COSTS_N_BYTES (2), /* cost of FADD and FSUB insns. */
121 COSTS_N_BYTES (2), /* cost of FMUL instruction. */
122 COSTS_N_BYTES (2), /* cost of FDIV instruction. */
123 COSTS_N_BYTES (2), /* cost of FABS instruction. */
124 COSTS_N_BYTES (2), /* cost of FCHS instruction. */
125 COSTS_N_BYTES (2), /* cost of FSQRT instruction. */
126 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
127 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}},
128 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
129 {rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}}}
130 };
131
132 /* Processor costs (relative to an add) */
133 static const
134 struct processor_costs i386_cost = { /* 386 specific costs */
135 COSTS_N_INSNS (1), /* cost of an add instruction */
136 COSTS_N_INSNS (1), /* cost of a lea instruction */
137 COSTS_N_INSNS (3), /* variable shift costs */
138 COSTS_N_INSNS (2), /* constant shift costs */
139 {COSTS_N_INSNS (6), /* cost of starting multiply for QI */
140 COSTS_N_INSNS (6), /* HI */
141 COSTS_N_INSNS (6), /* SI */
142 COSTS_N_INSNS (6), /* DI */
143 COSTS_N_INSNS (6)}, /* other */
144 COSTS_N_INSNS (1), /* cost of multiply per each bit set */
145 {COSTS_N_INSNS (23), /* cost of a divide/mod for QI */
146 COSTS_N_INSNS (23), /* HI */
147 COSTS_N_INSNS (23), /* SI */
148 COSTS_N_INSNS (23), /* DI */
149 COSTS_N_INSNS (23)}, /* other */
150 COSTS_N_INSNS (3), /* cost of movsx */
151 COSTS_N_INSNS (2), /* cost of movzx */
152 15, /* "large" insn */
153 3, /* MOVE_RATIO */
154 4, /* cost for loading QImode using movzbl */
155 {2, 4, 2}, /* cost of loading integer registers
156 in QImode, HImode and SImode.
157 Relative to reg-reg move (2). */
158 {2, 4, 2}, /* cost of storing integer registers */
159 2, /* cost of reg,reg fld/fst */
160 {8, 8, 8}, /* cost of loading fp registers
161 in SFmode, DFmode and XFmode */
162 {8, 8, 8}, /* cost of storing fp registers
163 in SFmode, DFmode and XFmode */
164 2, /* cost of moving MMX register */
165 {4, 8}, /* cost of loading MMX registers
166 in SImode and DImode */
167 {4, 8}, /* cost of storing MMX registers
168 in SImode and DImode */
169 2, /* cost of moving SSE register */
170 {4, 8, 16}, /* cost of loading SSE registers
171 in SImode, DImode and TImode */
172 {4, 8, 16}, /* cost of storing SSE registers
173 in SImode, DImode and TImode */
174 3, /* MMX or SSE register to integer */
175 0, /* size of l1 cache */
176 0, /* size of l2 cache */
177 0, /* size of prefetch block */
178 0, /* number of parallel prefetches */
179 1, /* Branch cost */
180 COSTS_N_INSNS (23), /* cost of FADD and FSUB insns. */
181 COSTS_N_INSNS (27), /* cost of FMUL instruction. */
182 COSTS_N_INSNS (88), /* cost of FDIV instruction. */
183 COSTS_N_INSNS (22), /* cost of FABS instruction. */
184 COSTS_N_INSNS (24), /* cost of FCHS instruction. */
185 COSTS_N_INSNS (122), /* cost of FSQRT instruction. */
186 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
187 DUMMY_STRINGOP_ALGS},
188 {{rep_prefix_1_byte, {{-1, rep_prefix_1_byte}}},
189 DUMMY_STRINGOP_ALGS},
190 };
191
192 static const
193 struct processor_costs i486_cost = { /* 486 specific costs */
194 COSTS_N_INSNS (1), /* cost of an add instruction */
195 COSTS_N_INSNS (1), /* cost of a lea instruction */
196 COSTS_N_INSNS (3), /* variable shift costs */
197 COSTS_N_INSNS (2), /* constant shift costs */
198 {COSTS_N_INSNS (12), /* cost of starting multiply for QI */
199 COSTS_N_INSNS (12), /* HI */
200 COSTS_N_INSNS (12), /* SI */
201 COSTS_N_INSNS (12), /* DI */
202 COSTS_N_INSNS (12)}, /* other */
203 1, /* cost of multiply per each bit set */
204 {COSTS_N_INSNS (40), /* cost of a divide/mod for QI */
205 COSTS_N_INSNS (40), /* HI */
206 COSTS_N_INSNS (40), /* SI */
207 COSTS_N_INSNS (40), /* DI */
208 COSTS_N_INSNS (40)}, /* other */
209 COSTS_N_INSNS (3), /* cost of movsx */
210 COSTS_N_INSNS (2), /* cost of movzx */
211 15, /* "large" insn */
212 3, /* MOVE_RATIO */
213 4, /* cost for loading QImode using movzbl */
214 {2, 4, 2}, /* cost of loading integer registers
215 in QImode, HImode and SImode.
216 Relative to reg-reg move (2). */
217 {2, 4, 2}, /* cost of storing integer registers */
218 2, /* cost of reg,reg fld/fst */
219 {8, 8, 8}, /* cost of loading fp registers
220 in SFmode, DFmode and XFmode */
221 {8, 8, 8}, /* cost of storing fp registers
222 in SFmode, DFmode and XFmode */
223 2, /* cost of moving MMX register */
224 {4, 8}, /* cost of loading MMX registers
225 in SImode and DImode */
226 {4, 8}, /* cost of storing MMX registers
227 in SImode and DImode */
228 2, /* cost of moving SSE register */
229 {4, 8, 16}, /* cost of loading SSE registers
230 in SImode, DImode and TImode */
231 {4, 8, 16}, /* cost of storing SSE registers
232 in SImode, DImode and TImode */
233 3, /* MMX or SSE register to integer */
234 4, /* size of l1 cache. 486 has 8kB cache
235 shared for code and data, so 4kB is
236 not really precise. */
237 4, /* size of l2 cache */
238 0, /* size of prefetch block */
239 0, /* number of parallel prefetches */
240 1, /* Branch cost */
241 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
242 COSTS_N_INSNS (16), /* cost of FMUL instruction. */
243 COSTS_N_INSNS (73), /* cost of FDIV instruction. */
244 COSTS_N_INSNS (3), /* cost of FABS instruction. */
245 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
246 COSTS_N_INSNS (83), /* cost of FSQRT instruction. */
247 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
248 DUMMY_STRINGOP_ALGS},
249 {{rep_prefix_4_byte, {{-1, rep_prefix_4_byte}}},
250 DUMMY_STRINGOP_ALGS}
251 };
252
253 static const
254 struct processor_costs pentium_cost = {
255 COSTS_N_INSNS (1), /* cost of an add instruction */
256 COSTS_N_INSNS (1), /* cost of a lea instruction */
257 COSTS_N_INSNS (4), /* variable shift costs */
258 COSTS_N_INSNS (1), /* constant shift costs */
259 {COSTS_N_INSNS (11), /* cost of starting multiply for QI */
260 COSTS_N_INSNS (11), /* HI */
261 COSTS_N_INSNS (11), /* SI */
262 COSTS_N_INSNS (11), /* DI */
263 COSTS_N_INSNS (11)}, /* other */
264 0, /* cost of multiply per each bit set */
265 {COSTS_N_INSNS (25), /* cost of a divide/mod for QI */
266 COSTS_N_INSNS (25), /* HI */
267 COSTS_N_INSNS (25), /* SI */
268 COSTS_N_INSNS (25), /* DI */
269 COSTS_N_INSNS (25)}, /* other */
270 COSTS_N_INSNS (3), /* cost of movsx */
271 COSTS_N_INSNS (2), /* cost of movzx */
272 8, /* "large" insn */
273 6, /* MOVE_RATIO */
274 6, /* cost for loading QImode using movzbl */
275 {2, 4, 2}, /* cost of loading integer registers
276 in QImode, HImode and SImode.
277 Relative to reg-reg move (2). */
278 {2, 4, 2}, /* cost of storing integer registers */
279 2, /* cost of reg,reg fld/fst */
280 {2, 2, 6}, /* cost of loading fp registers
281 in SFmode, DFmode and XFmode */
282 {4, 4, 6}, /* cost of storing fp registers
283 in SFmode, DFmode and XFmode */
284 8, /* cost of moving MMX register */
285 {8, 8}, /* cost of loading MMX registers
286 in SImode and DImode */
287 {8, 8}, /* cost of storing MMX registers
288 in SImode and DImode */
289 2, /* cost of moving SSE register */
290 {4, 8, 16}, /* cost of loading SSE registers
291 in SImode, DImode and TImode */
292 {4, 8, 16}, /* cost of storing SSE registers
293 in SImode, DImode and TImode */
294 3, /* MMX or SSE register to integer */
295 8, /* size of l1 cache. */
296 8, /* size of l2 cache */
297 0, /* size of prefetch block */
298 0, /* number of parallel prefetches */
299 2, /* Branch cost */
300 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
301 COSTS_N_INSNS (3), /* cost of FMUL instruction. */
302 COSTS_N_INSNS (39), /* cost of FDIV instruction. */
303 COSTS_N_INSNS (1), /* cost of FABS instruction. */
304 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
305 COSTS_N_INSNS (70), /* cost of FSQRT instruction. */
306 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
307 DUMMY_STRINGOP_ALGS},
308 {{libcall, {{-1, rep_prefix_4_byte}}},
309 DUMMY_STRINGOP_ALGS}
310 };
311
312 static const
313 struct processor_costs pentiumpro_cost = {
314 COSTS_N_INSNS (1), /* cost of an add instruction */
315 COSTS_N_INSNS (1), /* cost of a lea instruction */
316 COSTS_N_INSNS (1), /* variable shift costs */
317 COSTS_N_INSNS (1), /* constant shift costs */
318 {COSTS_N_INSNS (4), /* cost of starting multiply for QI */
319 COSTS_N_INSNS (4), /* HI */
320 COSTS_N_INSNS (4), /* SI */
321 COSTS_N_INSNS (4), /* DI */
322 COSTS_N_INSNS (4)}, /* other */
323 0, /* cost of multiply per each bit set */
324 {COSTS_N_INSNS (17), /* cost of a divide/mod for QI */
325 COSTS_N_INSNS (17), /* HI */
326 COSTS_N_INSNS (17), /* SI */
327 COSTS_N_INSNS (17), /* DI */
328 COSTS_N_INSNS (17)}, /* other */
329 COSTS_N_INSNS (1), /* cost of movsx */
330 COSTS_N_INSNS (1), /* cost of movzx */
331 8, /* "large" insn */
332 6, /* MOVE_RATIO */
333 2, /* cost for loading QImode using movzbl */
334 {4, 4, 4}, /* cost of loading integer registers
335 in QImode, HImode and SImode.
336 Relative to reg-reg move (2). */
337 {2, 2, 2}, /* cost of storing integer registers */
338 2, /* cost of reg,reg fld/fst */
339 {2, 2, 6}, /* cost of loading fp registers
340 in SFmode, DFmode and XFmode */
341 {4, 4, 6}, /* cost of storing fp registers
342 in SFmode, DFmode and XFmode */
343 2, /* cost of moving MMX register */
344 {2, 2}, /* cost of loading MMX registers
345 in SImode and DImode */
346 {2, 2}, /* cost of storing MMX registers
347 in SImode and DImode */
348 2, /* cost of moving SSE register */
349 {2, 2, 8}, /* cost of loading SSE registers
350 in SImode, DImode and TImode */
351 {2, 2, 8}, /* cost of storing SSE registers
352 in SImode, DImode and TImode */
353 3, /* MMX or SSE register to integer */
354 8, /* size of l1 cache. */
355 256, /* size of l2 cache */
356 32, /* size of prefetch block */
357 6, /* number of parallel prefetches */
358 2, /* Branch cost */
359 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
360 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
361 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
362 COSTS_N_INSNS (2), /* cost of FABS instruction. */
363 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
364 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
365 /* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes (we ensure
366 the alignment). For small blocks inline loop is still a noticeable win, for bigger
367 blocks either rep movsl or rep movsb is way to go. Rep movsb has apparently
368 more expensive startup time in CPU, but after 4K the difference is down in the noise.
369 */
370 {{rep_prefix_4_byte, {{128, loop}, {1024, unrolled_loop},
371 {8192, rep_prefix_4_byte}, {-1, rep_prefix_1_byte}}},
372 DUMMY_STRINGOP_ALGS},
373 {{rep_prefix_4_byte, {{1024, unrolled_loop},
374 {8192, rep_prefix_4_byte}, {-1, libcall}}},
375 DUMMY_STRINGOP_ALGS}
376 };
377
378 static const
379 struct processor_costs geode_cost = {
380 COSTS_N_INSNS (1), /* cost of an add instruction */
381 COSTS_N_INSNS (1), /* cost of a lea instruction */
382 COSTS_N_INSNS (2), /* variable shift costs */
383 COSTS_N_INSNS (1), /* constant shift costs */
384 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
385 COSTS_N_INSNS (4), /* HI */
386 COSTS_N_INSNS (7), /* SI */
387 COSTS_N_INSNS (7), /* DI */
388 COSTS_N_INSNS (7)}, /* other */
389 0, /* cost of multiply per each bit set */
390 {COSTS_N_INSNS (15), /* cost of a divide/mod for QI */
391 COSTS_N_INSNS (23), /* HI */
392 COSTS_N_INSNS (39), /* SI */
393 COSTS_N_INSNS (39), /* DI */
394 COSTS_N_INSNS (39)}, /* other */
395 COSTS_N_INSNS (1), /* cost of movsx */
396 COSTS_N_INSNS (1), /* cost of movzx */
397 8, /* "large" insn */
398 4, /* MOVE_RATIO */
399 1, /* cost for loading QImode using movzbl */
400 {1, 1, 1}, /* cost of loading integer registers
401 in QImode, HImode and SImode.
402 Relative to reg-reg move (2). */
403 {1, 1, 1}, /* cost of storing integer registers */
404 1, /* cost of reg,reg fld/fst */
405 {1, 1, 1}, /* cost of loading fp registers
406 in SFmode, DFmode and XFmode */
407 {4, 6, 6}, /* cost of storing fp registers
408 in SFmode, DFmode and XFmode */
409
410 1, /* cost of moving MMX register */
411 {1, 1}, /* cost of loading MMX registers
412 in SImode and DImode */
413 {1, 1}, /* cost of storing MMX registers
414 in SImode and DImode */
415 1, /* cost of moving SSE register */
416 {1, 1, 1}, /* cost of loading SSE registers
417 in SImode, DImode and TImode */
418 {1, 1, 1}, /* cost of storing SSE registers
419 in SImode, DImode and TImode */
420 1, /* MMX or SSE register to integer */
421 64, /* size of l1 cache. */
422 128, /* size of l2 cache. */
423 32, /* size of prefetch block */
424 1, /* number of parallel prefetches */
425 1, /* Branch cost */
426 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
427 COSTS_N_INSNS (11), /* cost of FMUL instruction. */
428 COSTS_N_INSNS (47), /* cost of FDIV instruction. */
429 COSTS_N_INSNS (1), /* cost of FABS instruction. */
430 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
431 COSTS_N_INSNS (54), /* cost of FSQRT instruction. */
432 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
433 DUMMY_STRINGOP_ALGS},
434 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
435 DUMMY_STRINGOP_ALGS}
436 };
437
438 static const
439 struct processor_costs k6_cost = {
440 COSTS_N_INSNS (1), /* cost of an add instruction */
441 COSTS_N_INSNS (2), /* cost of a lea instruction */
442 COSTS_N_INSNS (1), /* variable shift costs */
443 COSTS_N_INSNS (1), /* constant shift costs */
444 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
445 COSTS_N_INSNS (3), /* HI */
446 COSTS_N_INSNS (3), /* SI */
447 COSTS_N_INSNS (3), /* DI */
448 COSTS_N_INSNS (3)}, /* other */
449 0, /* cost of multiply per each bit set */
450 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
451 COSTS_N_INSNS (18), /* HI */
452 COSTS_N_INSNS (18), /* SI */
453 COSTS_N_INSNS (18), /* DI */
454 COSTS_N_INSNS (18)}, /* other */
455 COSTS_N_INSNS (2), /* cost of movsx */
456 COSTS_N_INSNS (2), /* cost of movzx */
457 8, /* "large" insn */
458 4, /* MOVE_RATIO */
459 3, /* cost for loading QImode using movzbl */
460 {4, 5, 4}, /* cost of loading integer registers
461 in QImode, HImode and SImode.
462 Relative to reg-reg move (2). */
463 {2, 3, 2}, /* cost of storing integer registers */
464 4, /* cost of reg,reg fld/fst */
465 {6, 6, 6}, /* cost of loading fp registers
466 in SFmode, DFmode and XFmode */
467 {4, 4, 4}, /* cost of storing fp registers
468 in SFmode, DFmode and XFmode */
469 2, /* cost of moving MMX register */
470 {2, 2}, /* cost of loading MMX registers
471 in SImode and DImode */
472 {2, 2}, /* cost of storing MMX registers
473 in SImode and DImode */
474 2, /* cost of moving SSE register */
475 {2, 2, 8}, /* cost of loading SSE registers
476 in SImode, DImode and TImode */
477 {2, 2, 8}, /* cost of storing SSE registers
478 in SImode, DImode and TImode */
479 6, /* MMX or SSE register to integer */
480 32, /* size of l1 cache. */
481 32, /* size of l2 cache. Some models
482 have integrated l2 cache, but
483 optimizing for k6 is not important
484 enough to worry about that. */
485 32, /* size of prefetch block */
486 1, /* number of parallel prefetches */
487 1, /* Branch cost */
488 COSTS_N_INSNS (2), /* cost of FADD and FSUB insns. */
489 COSTS_N_INSNS (2), /* cost of FMUL instruction. */
490 COSTS_N_INSNS (56), /* cost of FDIV instruction. */
491 COSTS_N_INSNS (2), /* cost of FABS instruction. */
492 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
493 COSTS_N_INSNS (56), /* cost of FSQRT instruction. */
494 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
495 DUMMY_STRINGOP_ALGS},
496 {{libcall, {{256, rep_prefix_4_byte}, {-1, libcall}}},
497 DUMMY_STRINGOP_ALGS}
498 };
499
500 static const
501 struct processor_costs athlon_cost = {
502 COSTS_N_INSNS (1), /* cost of an add instruction */
503 COSTS_N_INSNS (2), /* cost of a lea instruction */
504 COSTS_N_INSNS (1), /* variable shift costs */
505 COSTS_N_INSNS (1), /* constant shift costs */
506 {COSTS_N_INSNS (5), /* cost of starting multiply for QI */
507 COSTS_N_INSNS (5), /* HI */
508 COSTS_N_INSNS (5), /* SI */
509 COSTS_N_INSNS (5), /* DI */
510 COSTS_N_INSNS (5)}, /* other */
511 0, /* cost of multiply per each bit set */
512 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
513 COSTS_N_INSNS (26), /* HI */
514 COSTS_N_INSNS (42), /* SI */
515 COSTS_N_INSNS (74), /* DI */
516 COSTS_N_INSNS (74)}, /* other */
517 COSTS_N_INSNS (1), /* cost of movsx */
518 COSTS_N_INSNS (1), /* cost of movzx */
519 8, /* "large" insn */
520 9, /* MOVE_RATIO */
521 4, /* cost for loading QImode using movzbl */
522 {3, 4, 3}, /* cost of loading integer registers
523 in QImode, HImode and SImode.
524 Relative to reg-reg move (2). */
525 {3, 4, 3}, /* cost of storing integer registers */
526 4, /* cost of reg,reg fld/fst */
527 {4, 4, 12}, /* cost of loading fp registers
528 in SFmode, DFmode and XFmode */
529 {6, 6, 8}, /* cost of storing fp registers
530 in SFmode, DFmode and XFmode */
531 2, /* cost of moving MMX register */
532 {4, 4}, /* cost of loading MMX registers
533 in SImode and DImode */
534 {4, 4}, /* cost of storing MMX registers
535 in SImode and DImode */
536 2, /* cost of moving SSE register */
537 {4, 4, 6}, /* cost of loading SSE registers
538 in SImode, DImode and TImode */
539 {4, 4, 5}, /* cost of storing SSE registers
540 in SImode, DImode and TImode */
541 5, /* MMX or SSE register to integer */
542 64, /* size of l1 cache. */
543 256, /* size of l2 cache. */
544 64, /* size of prefetch block */
545 6, /* number of parallel prefetches */
546 5, /* Branch cost */
547 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
548 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
549 COSTS_N_INSNS (24), /* cost of FDIV instruction. */
550 COSTS_N_INSNS (2), /* cost of FABS instruction. */
551 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
552 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
553 /* For some reason, Athlon deals better with REP prefix (relative to loops)
554 compared to K8. Alignment becomes important after 8 bytes for memcpy and
555 128 bytes for memset. */
556 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
557 DUMMY_STRINGOP_ALGS},
558 {{libcall, {{2048, rep_prefix_4_byte}, {-1, libcall}}},
559 DUMMY_STRINGOP_ALGS}
560 };
561
562 static const
563 struct processor_costs k8_cost = {
564 COSTS_N_INSNS (1), /* cost of an add instruction */
565 COSTS_N_INSNS (2), /* cost of a lea instruction */
566 COSTS_N_INSNS (1), /* variable shift costs */
567 COSTS_N_INSNS (1), /* constant shift costs */
568 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
569 COSTS_N_INSNS (4), /* HI */
570 COSTS_N_INSNS (3), /* SI */
571 COSTS_N_INSNS (4), /* DI */
572 COSTS_N_INSNS (5)}, /* other */
573 0, /* cost of multiply per each bit set */
574 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
575 COSTS_N_INSNS (26), /* HI */
576 COSTS_N_INSNS (42), /* SI */
577 COSTS_N_INSNS (74), /* DI */
578 COSTS_N_INSNS (74)}, /* other */
579 COSTS_N_INSNS (1), /* cost of movsx */
580 COSTS_N_INSNS (1), /* cost of movzx */
581 8, /* "large" insn */
582 9, /* MOVE_RATIO */
583 4, /* cost for loading QImode using movzbl */
584 {3, 4, 3}, /* cost of loading integer registers
585 in QImode, HImode and SImode.
586 Relative to reg-reg move (2). */
587 {3, 4, 3}, /* cost of storing integer registers */
588 4, /* cost of reg,reg fld/fst */
589 {4, 4, 12}, /* cost of loading fp registers
590 in SFmode, DFmode and XFmode */
591 {6, 6, 8}, /* cost of storing fp registers
592 in SFmode, DFmode and XFmode */
593 2, /* cost of moving MMX register */
594 {3, 3}, /* cost of loading MMX registers
595 in SImode and DImode */
596 {4, 4}, /* cost of storing MMX registers
597 in SImode and DImode */
598 2, /* cost of moving SSE register */
599 {4, 3, 6}, /* cost of loading SSE registers
600 in SImode, DImode and TImode */
601 {4, 4, 5}, /* cost of storing SSE registers
602 in SImode, DImode and TImode */
603 5, /* MMX or SSE register to integer */
604 64, /* size of l1 cache. */
605 512, /* size of l2 cache. */
606 64, /* size of prefetch block */
607 /* New AMD processors never drop prefetches; if they cannot be performed
608 immediately, they are queued. We set number of simultaneous prefetches
609 to a large constant to reflect this (it probably is not a good idea not
610 to limit number of prefetches at all, as their execution also takes some
611 time). */
612 100, /* number of parallel prefetches */
613 5, /* Branch cost */
614 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
615 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
616 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
617 COSTS_N_INSNS (2), /* cost of FABS instruction. */
618 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
619 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
620 /* K8 has optimized REP instruction for medium sized blocks, but for very small
621 blocks it is better to use loop. For large blocks, libcall can do
622 nontemporary accesses and beat inline considerably. */
623 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
624 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
625 {{libcall, {{8, loop}, {24, unrolled_loop},
626 {2048, rep_prefix_4_byte}, {-1, libcall}}},
627 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
628 };
629
630 struct processor_costs amdfam10_cost = {
631 COSTS_N_INSNS (1), /* cost of an add instruction */
632 COSTS_N_INSNS (2), /* cost of a lea instruction */
633 COSTS_N_INSNS (1), /* variable shift costs */
634 COSTS_N_INSNS (1), /* constant shift costs */
635 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
636 COSTS_N_INSNS (4), /* HI */
637 COSTS_N_INSNS (3), /* SI */
638 COSTS_N_INSNS (4), /* DI */
639 COSTS_N_INSNS (5)}, /* other */
640 0, /* cost of multiply per each bit set */
641 {COSTS_N_INSNS (19), /* cost of a divide/mod for QI */
642 COSTS_N_INSNS (35), /* HI */
643 COSTS_N_INSNS (51), /* SI */
644 COSTS_N_INSNS (83), /* DI */
645 COSTS_N_INSNS (83)}, /* other */
646 COSTS_N_INSNS (1), /* cost of movsx */
647 COSTS_N_INSNS (1), /* cost of movzx */
648 8, /* "large" insn */
649 9, /* MOVE_RATIO */
650 4, /* cost for loading QImode using movzbl */
651 {3, 4, 3}, /* cost of loading integer registers
652 in QImode, HImode and SImode.
653 Relative to reg-reg move (2). */
654 {3, 4, 3}, /* cost of storing integer registers */
655 4, /* cost of reg,reg fld/fst */
656 {4, 4, 12}, /* cost of loading fp registers
657 in SFmode, DFmode and XFmode */
658 {6, 6, 8}, /* cost of storing fp registers
659 in SFmode, DFmode and XFmode */
660 2, /* cost of moving MMX register */
661 {3, 3}, /* cost of loading MMX registers
662 in SImode and DImode */
663 {4, 4}, /* cost of storing MMX registers
664 in SImode and DImode */
665 2, /* cost of moving SSE register */
666 {4, 4, 3}, /* cost of loading SSE registers
667 in SImode, DImode and TImode */
668 {4, 4, 5}, /* cost of storing SSE registers
669 in SImode, DImode and TImode */
670 3, /* MMX or SSE register to integer */
671 /* On K8
672 MOVD reg64, xmmreg Double FSTORE 4
673 MOVD reg32, xmmreg Double FSTORE 4
674 On AMDFAM10
675 MOVD reg64, xmmreg Double FADD 3
676 1/1 1/1
677 MOVD reg32, xmmreg Double FADD 3
678 1/1 1/1 */
679 64, /* size of l1 cache. */
680 512, /* size of l2 cache. */
681 64, /* size of prefetch block */
682 /* New AMD processors never drop prefetches; if they cannot be performed
683 immediately, they are queued. We set number of simultaneous prefetches
684 to a large constant to reflect this (it probably is not a good idea not
685 to limit number of prefetches at all, as their execution also takes some
686 time). */
687 100, /* number of parallel prefetches */
688 5, /* Branch cost */
689 COSTS_N_INSNS (4), /* cost of FADD and FSUB insns. */
690 COSTS_N_INSNS (4), /* cost of FMUL instruction. */
691 COSTS_N_INSNS (19), /* cost of FDIV instruction. */
692 COSTS_N_INSNS (2), /* cost of FABS instruction. */
693 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
694 COSTS_N_INSNS (35), /* cost of FSQRT instruction. */
695
696 /* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
697 very small blocks it is better to use loop. For large blocks, libcall can
698 do nontemporary accesses and beat inline considerably. */
699 {{libcall, {{6, loop}, {14, unrolled_loop}, {-1, rep_prefix_4_byte}}},
700 {libcall, {{16, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
701 {{libcall, {{8, loop}, {24, unrolled_loop},
702 {2048, rep_prefix_4_byte}, {-1, libcall}}},
703 {libcall, {{48, unrolled_loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
704 };
705
706 static const
707 struct processor_costs pentium4_cost = {
708 COSTS_N_INSNS (1), /* cost of an add instruction */
709 COSTS_N_INSNS (3), /* cost of a lea instruction */
710 COSTS_N_INSNS (4), /* variable shift costs */
711 COSTS_N_INSNS (4), /* constant shift costs */
712 {COSTS_N_INSNS (15), /* cost of starting multiply for QI */
713 COSTS_N_INSNS (15), /* HI */
714 COSTS_N_INSNS (15), /* SI */
715 COSTS_N_INSNS (15), /* DI */
716 COSTS_N_INSNS (15)}, /* other */
717 0, /* cost of multiply per each bit set */
718 {COSTS_N_INSNS (56), /* cost of a divide/mod for QI */
719 COSTS_N_INSNS (56), /* HI */
720 COSTS_N_INSNS (56), /* SI */
721 COSTS_N_INSNS (56), /* DI */
722 COSTS_N_INSNS (56)}, /* other */
723 COSTS_N_INSNS (1), /* cost of movsx */
724 COSTS_N_INSNS (1), /* cost of movzx */
725 16, /* "large" insn */
726 6, /* MOVE_RATIO */
727 2, /* cost for loading QImode using movzbl */
728 {4, 5, 4}, /* cost of loading integer registers
729 in QImode, HImode and SImode.
730 Relative to reg-reg move (2). */
731 {2, 3, 2}, /* cost of storing integer registers */
732 2, /* cost of reg,reg fld/fst */
733 {2, 2, 6}, /* cost of loading fp registers
734 in SFmode, DFmode and XFmode */
735 {4, 4, 6}, /* cost of storing fp registers
736 in SFmode, DFmode and XFmode */
737 2, /* cost of moving MMX register */
738 {2, 2}, /* cost of loading MMX registers
739 in SImode and DImode */
740 {2, 2}, /* cost of storing MMX registers
741 in SImode and DImode */
742 12, /* cost of moving SSE register */
743 {12, 12, 12}, /* cost of loading SSE registers
744 in SImode, DImode and TImode */
745 {2, 2, 8}, /* cost of storing SSE registers
746 in SImode, DImode and TImode */
747 10, /* MMX or SSE register to integer */
748 8, /* size of l1 cache. */
749 256, /* size of l2 cache. */
750 64, /* size of prefetch block */
751 6, /* number of parallel prefetches */
752 2, /* Branch cost */
753 COSTS_N_INSNS (5), /* cost of FADD and FSUB insns. */
754 COSTS_N_INSNS (7), /* cost of FMUL instruction. */
755 COSTS_N_INSNS (43), /* cost of FDIV instruction. */
756 COSTS_N_INSNS (2), /* cost of FABS instruction. */
757 COSTS_N_INSNS (2), /* cost of FCHS instruction. */
758 COSTS_N_INSNS (43), /* cost of FSQRT instruction. */
759 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
760 DUMMY_STRINGOP_ALGS},
761 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
762 {-1, libcall}}},
763 DUMMY_STRINGOP_ALGS},
764 };
765
766 static const
767 struct processor_costs nocona_cost = {
768 COSTS_N_INSNS (1), /* cost of an add instruction */
769 COSTS_N_INSNS (1), /* cost of a lea instruction */
770 COSTS_N_INSNS (1), /* variable shift costs */
771 COSTS_N_INSNS (1), /* constant shift costs */
772 {COSTS_N_INSNS (10), /* cost of starting multiply for QI */
773 COSTS_N_INSNS (10), /* HI */
774 COSTS_N_INSNS (10), /* SI */
775 COSTS_N_INSNS (10), /* DI */
776 COSTS_N_INSNS (10)}, /* other */
777 0, /* cost of multiply per each bit set */
778 {COSTS_N_INSNS (66), /* cost of a divide/mod for QI */
779 COSTS_N_INSNS (66), /* HI */
780 COSTS_N_INSNS (66), /* SI */
781 COSTS_N_INSNS (66), /* DI */
782 COSTS_N_INSNS (66)}, /* other */
783 COSTS_N_INSNS (1), /* cost of movsx */
784 COSTS_N_INSNS (1), /* cost of movzx */
785 16, /* "large" insn */
786 17, /* MOVE_RATIO */
787 4, /* cost for loading QImode using movzbl */
788 {4, 4, 4}, /* cost of loading integer registers
789 in QImode, HImode and SImode.
790 Relative to reg-reg move (2). */
791 {4, 4, 4}, /* cost of storing integer registers */
792 3, /* cost of reg,reg fld/fst */
793 {12, 12, 12}, /* cost of loading fp registers
794 in SFmode, DFmode and XFmode */
795 {4, 4, 4}, /* cost of storing fp registers
796 in SFmode, DFmode and XFmode */
797 6, /* cost of moving MMX register */
798 {12, 12}, /* cost of loading MMX registers
799 in SImode and DImode */
800 {12, 12}, /* cost of storing MMX registers
801 in SImode and DImode */
802 6, /* cost of moving SSE register */
803 {12, 12, 12}, /* cost of loading SSE registers
804 in SImode, DImode and TImode */
805 {12, 12, 12}, /* cost of storing SSE registers
806 in SImode, DImode and TImode */
807 8, /* MMX or SSE register to integer */
808 8, /* size of l1 cache. */
809 1024, /* size of l2 cache. */
810 128, /* size of prefetch block */
811 8, /* number of parallel prefetches */
812 1, /* Branch cost */
813 COSTS_N_INSNS (6), /* cost of FADD and FSUB insns. */
814 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
815 COSTS_N_INSNS (40), /* cost of FDIV instruction. */
816 COSTS_N_INSNS (3), /* cost of FABS instruction. */
817 COSTS_N_INSNS (3), /* cost of FCHS instruction. */
818 COSTS_N_INSNS (44), /* cost of FSQRT instruction. */
819 {{libcall, {{12, loop_1_byte}, {-1, rep_prefix_4_byte}}},
820 {libcall, {{32, loop}, {20000, rep_prefix_8_byte},
821 {100000, unrolled_loop}, {-1, libcall}}}},
822 {{libcall, {{6, loop_1_byte}, {48, loop}, {20480, rep_prefix_4_byte},
823 {-1, libcall}}},
824 {libcall, {{24, loop}, {64, unrolled_loop},
825 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
826 };
827
828 static const
829 struct processor_costs core2_cost = {
830 COSTS_N_INSNS (1), /* cost of an add instruction */
831 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
832 COSTS_N_INSNS (1), /* variable shift costs */
833 COSTS_N_INSNS (1), /* constant shift costs */
834 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
835 COSTS_N_INSNS (3), /* HI */
836 COSTS_N_INSNS (3), /* SI */
837 COSTS_N_INSNS (3), /* DI */
838 COSTS_N_INSNS (3)}, /* other */
839 0, /* cost of multiply per each bit set */
840 {COSTS_N_INSNS (22), /* cost of a divide/mod for QI */
841 COSTS_N_INSNS (22), /* HI */
842 COSTS_N_INSNS (22), /* SI */
843 COSTS_N_INSNS (22), /* DI */
844 COSTS_N_INSNS (22)}, /* other */
845 COSTS_N_INSNS (1), /* cost of movsx */
846 COSTS_N_INSNS (1), /* cost of movzx */
847 8, /* "large" insn */
848 16, /* MOVE_RATIO */
849 2, /* cost for loading QImode using movzbl */
850 {6, 6, 6}, /* cost of loading integer registers
851 in QImode, HImode and SImode.
852 Relative to reg-reg move (2). */
853 {4, 4, 4}, /* cost of storing integer registers */
854 2, /* cost of reg,reg fld/fst */
855 {6, 6, 6}, /* cost of loading fp registers
856 in SFmode, DFmode and XFmode */
857 {4, 4, 4}, /* cost of loading integer registers */
858 2, /* cost of moving MMX register */
859 {6, 6}, /* cost of loading MMX registers
860 in SImode and DImode */
861 {4, 4}, /* cost of storing MMX registers
862 in SImode and DImode */
863 2, /* cost of moving SSE register */
864 {6, 6, 6}, /* cost of loading SSE registers
865 in SImode, DImode and TImode */
866 {4, 4, 4}, /* cost of storing SSE registers
867 in SImode, DImode and TImode */
868 2, /* MMX or SSE register to integer */
869 32, /* size of l1 cache. */
870 2048, /* size of l2 cache. */
871 128, /* size of prefetch block */
872 8, /* number of parallel prefetches */
873 3, /* Branch cost */
874 COSTS_N_INSNS (3), /* cost of FADD and FSUB insns. */
875 COSTS_N_INSNS (5), /* cost of FMUL instruction. */
876 COSTS_N_INSNS (32), /* cost of FDIV instruction. */
877 COSTS_N_INSNS (1), /* cost of FABS instruction. */
878 COSTS_N_INSNS (1), /* cost of FCHS instruction. */
879 COSTS_N_INSNS (58), /* cost of FSQRT instruction. */
880 {{libcall, {{11, loop}, {-1, rep_prefix_4_byte}}},
881 {libcall, {{32, loop}, {64, rep_prefix_4_byte},
882 {8192, rep_prefix_8_byte}, {-1, libcall}}}},
883 {{libcall, {{8, loop}, {15, unrolled_loop},
884 {2048, rep_prefix_4_byte}, {-1, libcall}}},
885 {libcall, {{24, loop}, {32, unrolled_loop},
886 {8192, rep_prefix_8_byte}, {-1, libcall}}}}
887 };
888
889 /* Generic64 should produce code tuned for Nocona and K8. */
890 static const
891 struct processor_costs generic64_cost = {
892 COSTS_N_INSNS (1), /* cost of an add instruction */
893 /* On all chips taken into consideration lea is 2 cycles and more. With
894 this cost however our current implementation of synth_mult results in
895 use of unnecessary temporary registers causing regression on several
896 SPECfp benchmarks. */
897 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
898 COSTS_N_INSNS (1), /* variable shift costs */
899 COSTS_N_INSNS (1), /* constant shift costs */
900 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
901 COSTS_N_INSNS (4), /* HI */
902 COSTS_N_INSNS (3), /* SI */
903 COSTS_N_INSNS (4), /* DI */
904 COSTS_N_INSNS (2)}, /* other */
905 0, /* cost of multiply per each bit set */
906 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
907 COSTS_N_INSNS (26), /* HI */
908 COSTS_N_INSNS (42), /* SI */
909 COSTS_N_INSNS (74), /* DI */
910 COSTS_N_INSNS (74)}, /* other */
911 COSTS_N_INSNS (1), /* cost of movsx */
912 COSTS_N_INSNS (1), /* cost of movzx */
913 8, /* "large" insn */
914 17, /* MOVE_RATIO */
915 4, /* cost for loading QImode using movzbl */
916 {4, 4, 4}, /* cost of loading integer registers
917 in QImode, HImode and SImode.
918 Relative to reg-reg move (2). */
919 {4, 4, 4}, /* cost of storing integer registers */
920 4, /* cost of reg,reg fld/fst */
921 {12, 12, 12}, /* cost of loading fp registers
922 in SFmode, DFmode and XFmode */
923 {6, 6, 8}, /* cost of storing fp registers
924 in SFmode, DFmode and XFmode */
925 2, /* cost of moving MMX register */
926 {8, 8}, /* cost of loading MMX registers
927 in SImode and DImode */
928 {8, 8}, /* cost of storing MMX registers
929 in SImode and DImode */
930 2, /* cost of moving SSE register */
931 {8, 8, 8}, /* cost of loading SSE registers
932 in SImode, DImode and TImode */
933 {8, 8, 8}, /* cost of storing SSE registers
934 in SImode, DImode and TImode */
935 5, /* MMX or SSE register to integer */
936 32, /* size of l1 cache. */
937 512, /* size of l2 cache. */
938 64, /* size of prefetch block */
939 6, /* number of parallel prefetches */
940 /* Benchmarks shows large regressions on K8 sixtrack benchmark when this value
941 is increased to perhaps more appropriate value of 5. */
942 3, /* Branch cost */
943 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
944 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
945 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
946 COSTS_N_INSNS (8), /* cost of FABS instruction. */
947 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
948 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
949 {DUMMY_STRINGOP_ALGS,
950 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}},
951 {DUMMY_STRINGOP_ALGS,
952 {libcall, {{32, loop}, {8192, rep_prefix_8_byte}, {-1, libcall}}}}
953 };
954
955 /* Generic32 should produce code tuned for Athlon, PPro, Pentium4, Nocona and K8. */
956 static const
957 struct processor_costs generic32_cost = {
958 COSTS_N_INSNS (1), /* cost of an add instruction */
959 COSTS_N_INSNS (1) + 1, /* cost of a lea instruction */
960 COSTS_N_INSNS (1), /* variable shift costs */
961 COSTS_N_INSNS (1), /* constant shift costs */
962 {COSTS_N_INSNS (3), /* cost of starting multiply for QI */
963 COSTS_N_INSNS (4), /* HI */
964 COSTS_N_INSNS (3), /* SI */
965 COSTS_N_INSNS (4), /* DI */
966 COSTS_N_INSNS (2)}, /* other */
967 0, /* cost of multiply per each bit set */
968 {COSTS_N_INSNS (18), /* cost of a divide/mod for QI */
969 COSTS_N_INSNS (26), /* HI */
970 COSTS_N_INSNS (42), /* SI */
971 COSTS_N_INSNS (74), /* DI */
972 COSTS_N_INSNS (74)}, /* other */
973 COSTS_N_INSNS (1), /* cost of movsx */
974 COSTS_N_INSNS (1), /* cost of movzx */
975 8, /* "large" insn */
976 17, /* MOVE_RATIO */
977 4, /* cost for loading QImode using movzbl */
978 {4, 4, 4}, /* cost of loading integer registers
979 in QImode, HImode and SImode.
980 Relative to reg-reg move (2). */
981 {4, 4, 4}, /* cost of storing integer registers */
982 4, /* cost of reg,reg fld/fst */
983 {12, 12, 12}, /* cost of loading fp registers
984 in SFmode, DFmode and XFmode */
985 {6, 6, 8}, /* cost of storing fp registers
986 in SFmode, DFmode and XFmode */
987 2, /* cost of moving MMX register */
988 {8, 8}, /* cost of loading MMX registers
989 in SImode and DImode */
990 {8, 8}, /* cost of storing MMX registers
991 in SImode and DImode */
992 2, /* cost of moving SSE register */
993 {8, 8, 8}, /* cost of loading SSE registers
994 in SImode, DImode and TImode */
995 {8, 8, 8}, /* cost of storing SSE registers
996 in SImode, DImode and TImode */
997 5, /* MMX or SSE register to integer */
998 32, /* size of l1 cache. */
999 256, /* size of l2 cache. */
1000 64, /* size of prefetch block */
1001 6, /* number of parallel prefetches */
1002 3, /* Branch cost */
1003 COSTS_N_INSNS (8), /* cost of FADD and FSUB insns. */
1004 COSTS_N_INSNS (8), /* cost of FMUL instruction. */
1005 COSTS_N_INSNS (20), /* cost of FDIV instruction. */
1006 COSTS_N_INSNS (8), /* cost of FABS instruction. */
1007 COSTS_N_INSNS (8), /* cost of FCHS instruction. */
1008 COSTS_N_INSNS (40), /* cost of FSQRT instruction. */
1009 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1010 DUMMY_STRINGOP_ALGS},
1011 {{libcall, {{32, loop}, {8192, rep_prefix_4_byte}, {-1, libcall}}},
1012 DUMMY_STRINGOP_ALGS},
1013 };
1014
1015 const struct processor_costs *ix86_cost = &pentium_cost;
1016
1017 /* Processor feature/optimization bitmasks. */
1018 #define m_386 (1<<PROCESSOR_I386)
1019 #define m_486 (1<<PROCESSOR_I486)
1020 #define m_PENT (1<<PROCESSOR_PENTIUM)
1021 #define m_PPRO (1<<PROCESSOR_PENTIUMPRO)
1022 #define m_PENT4 (1<<PROCESSOR_PENTIUM4)
1023 #define m_NOCONA (1<<PROCESSOR_NOCONA)
1024 #define m_CORE2 (1<<PROCESSOR_CORE2)
1025
1026 #define m_GEODE (1<<PROCESSOR_GEODE)
1027 #define m_K6 (1<<PROCESSOR_K6)
1028 #define m_K6_GEODE (m_K6 | m_GEODE)
1029 #define m_K8 (1<<PROCESSOR_K8)
1030 #define m_ATHLON (1<<PROCESSOR_ATHLON)
1031 #define m_ATHLON_K8 (m_K8 | m_ATHLON)
1032 #define m_AMDFAM10 (1<<PROCESSOR_AMDFAM10)
1033 #define m_ATHLON_K8_AMDFAM10 (m_K8 | m_ATHLON | m_AMDFAM10)
1034
1035 #define m_GENERIC32 (1<<PROCESSOR_GENERIC32)
1036 #define m_GENERIC64 (1<<PROCESSOR_GENERIC64)
1037
1038 /* Generic instruction choice should be common subset of supported CPUs
1039 (PPro/PENT4/NOCONA/CORE2/Athlon/K8). */
1040 #define m_GENERIC (m_GENERIC32 | m_GENERIC64)
1041
1042 /* Feature tests against the various tunings. */
1043 unsigned int ix86_tune_features[X86_TUNE_LAST] = {
1044 /* X86_TUNE_USE_LEAVE: Leave does not affect Nocona SPEC2000 results
1045 negatively, so enabling for Generic64 seems like good code size
1046 tradeoff. We can't enable it for 32bit generic because it does not
1047 work well with PPro base chips. */
1048 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC64,
1049
1050 /* X86_TUNE_PUSH_MEMORY */
1051 m_386 | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4
1052 | m_NOCONA | m_CORE2 | m_GENERIC,
1053
1054 /* X86_TUNE_ZERO_EXTEND_WITH_AND */
1055 m_486 | m_PENT,
1056
1057 /* X86_TUNE_USE_BIT_TEST */
1058 m_386,
1059
1060 /* X86_TUNE_UNROLL_STRLEN */
1061 m_486 | m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6 | m_CORE2 | m_GENERIC,
1062
1063 /* X86_TUNE_DEEP_BRANCH_PREDICTION */
1064 m_PPRO | m_K6_GEODE | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_GENERIC,
1065
1066 /* X86_TUNE_BRANCH_PREDICTION_HINTS: Branch hints were put in P4 based
1067 on simulation result. But after P4 was made, no performance benefit
1068 was observed with branch hints. It also increases the code size.
1069 As a result, icc never generates branch hints. */
1070 0,
1071
1072 /* X86_TUNE_DOUBLE_WITH_ADD */
1073 ~m_386,
1074
1075 /* X86_TUNE_USE_SAHF */
1076 m_PPRO | m_K6_GEODE | m_K8 | m_AMDFAM10 | m_PENT4
1077 | m_NOCONA | m_CORE2 | m_GENERIC,
1078
1079 /* X86_TUNE_MOVX: Enable to zero extend integer registers to avoid
1080 partial dependencies. */
1081 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA
1082 | m_CORE2 | m_GENERIC | m_GEODE /* m_386 | m_K6 */,
1083
1084 /* X86_TUNE_PARTIAL_REG_STALL: We probably ought to watch for partial
1085 register stalls on Generic32 compilation setting as well. However
1086 in current implementation the partial register stalls are not eliminated
1087 very well - they can be introduced via subregs synthesized by combine
1088 and can happen in caller/callee saving sequences. Because this option
1089 pays back little on PPro based chips and is in conflict with partial reg
1090 dependencies used by Athlon/P4 based chips, it is better to leave it off
1091 for generic32 for now. */
1092 m_PPRO,
1093
1094 /* X86_TUNE_PARTIAL_FLAG_REG_STALL */
1095 m_CORE2 | m_GENERIC,
1096
1097 /* X86_TUNE_USE_HIMODE_FIOP */
1098 m_386 | m_486 | m_K6_GEODE,
1099
1100 /* X86_TUNE_USE_SIMODE_FIOP */
1101 ~(m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT | m_CORE2 | m_GENERIC),
1102
1103 /* X86_TUNE_USE_MOV0 */
1104 m_K6,
1105
1106 /* X86_TUNE_USE_CLTD */
1107 ~(m_PENT | m_K6 | m_CORE2 | m_GENERIC),
1108
1109 /* X86_TUNE_USE_XCHGB: Use xchgb %rh,%rl instead of rolw/rorw $8,rx. */
1110 m_PENT4,
1111
1112 /* X86_TUNE_SPLIT_LONG_MOVES */
1113 m_PPRO,
1114
1115 /* X86_TUNE_READ_MODIFY_WRITE */
1116 ~m_PENT,
1117
1118 /* X86_TUNE_READ_MODIFY */
1119 ~(m_PENT | m_PPRO),
1120
1121 /* X86_TUNE_PROMOTE_QIMODE */
1122 m_K6_GEODE | m_PENT | m_386 | m_486 | m_ATHLON_K8_AMDFAM10 | m_CORE2
1123 | m_GENERIC /* | m_PENT4 ? */,
1124
1125 /* X86_TUNE_FAST_PREFIX */
1126 ~(m_PENT | m_486 | m_386),
1127
1128 /* X86_TUNE_SINGLE_STRINGOP */
1129 m_386 | m_PENT4 | m_NOCONA,
1130
1131 /* X86_TUNE_QIMODE_MATH */
1132 ~0,
1133
1134 /* X86_TUNE_HIMODE_MATH: On PPro this flag is meant to avoid partial
1135 register stalls. Just like X86_TUNE_PARTIAL_REG_STALL this option
1136 might be considered for Generic32 if our scheme for avoiding partial
1137 stalls was more effective. */
1138 ~m_PPRO,
1139
1140 /* X86_TUNE_PROMOTE_QI_REGS */
1141 0,
1142
1143 /* X86_TUNE_PROMOTE_HI_REGS */
1144 m_PPRO,
1145
1146 /* X86_TUNE_ADD_ESP_4: Enable if add/sub is preferred over 1/2 push/pop. */
1147 m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1148
1149 /* X86_TUNE_ADD_ESP_8 */
1150 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_K6_GEODE | m_386
1151 | m_486 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1152
1153 /* X86_TUNE_SUB_ESP_4 */
1154 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1155
1156 /* X86_TUNE_SUB_ESP_8 */
1157 m_ATHLON_K8_AMDFAM10 | m_PPRO | m_386 | m_486
1158 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1159
1160 /* X86_TUNE_INTEGER_DFMODE_MOVES: Enable if integer moves are preferred
1161 for DFmode copies */
1162 ~(m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2
1163 | m_GENERIC | m_GEODE),
1164
1165 /* X86_TUNE_PARTIAL_REG_DEPENDENCY */
1166 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1167
1168 /* X86_TUNE_SSE_PARTIAL_REG_DEPENDENCY: In the Generic model we have a
1169 conflict here in between PPro/Pentium4 based chips that thread 128bit
1170 SSE registers as single units versus K8 based chips that divide SSE
1171 registers to two 64bit halves. This knob promotes all store destinations
1172 to be 128bit to allow register renaming on 128bit SSE units, but usually
1173 results in one extra microop on 64bit SSE units. Experimental results
1174 shows that disabling this option on P4 brings over 20% SPECfp regression,
1175 while enabling it on K8 brings roughly 2.4% regression that can be partly
1176 masked by careful scheduling of moves. */
1177 m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC | m_AMDFAM10,
1178
1179 /* X86_TUNE_SSE_UNALIGNED_MOVE_OPTIMAL */
1180 m_AMDFAM10,
1181
1182 /* X86_TUNE_SSE_SPLIT_REGS: Set for machines where the type and dependencies
1183 are resolved on SSE register parts instead of whole registers, so we may
1184 maintain just lower part of scalar values in proper format leaving the
1185 upper part undefined. */
1186 m_ATHLON_K8,
1187
1188 /* X86_TUNE_SSE_TYPELESS_STORES */
1189 m_ATHLON_K8_AMDFAM10,
1190
1191 /* X86_TUNE_SSE_LOAD0_BY_PXOR */
1192 m_PPRO | m_PENT4 | m_NOCONA,
1193
1194 /* X86_TUNE_MEMORY_MISMATCH_STALL */
1195 m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1196
1197 /* X86_TUNE_PROLOGUE_USING_MOVE */
1198 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1199
1200 /* X86_TUNE_EPILOGUE_USING_MOVE */
1201 m_ATHLON_K8 | m_PPRO | m_CORE2 | m_GENERIC,
1202
1203 /* X86_TUNE_SHIFT1 */
1204 ~m_486,
1205
1206 /* X86_TUNE_USE_FFREEP */
1207 m_ATHLON_K8_AMDFAM10,
1208
1209 /* X86_TUNE_INTER_UNIT_MOVES */
1210 ~(m_ATHLON_K8_AMDFAM10 | m_GENERIC),
1211
1212 /* X86_TUNE_FOUR_JUMP_LIMIT: Some CPU cores are not able to predict more
1213 than 4 branch instructions in the 16 byte window. */
1214 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_CORE2 | m_GENERIC,
1215
1216 /* X86_TUNE_SCHEDULE */
1217 m_PPRO | m_ATHLON_K8_AMDFAM10 | m_K6_GEODE | m_PENT | m_CORE2 | m_GENERIC,
1218
1219 /* X86_TUNE_USE_BT */
1220 m_ATHLON_K8_AMDFAM10,
1221
1222 /* X86_TUNE_USE_INCDEC */
1223 ~(m_PENT4 | m_NOCONA | m_GENERIC),
1224
1225 /* X86_TUNE_PAD_RETURNS */
1226 m_ATHLON_K8_AMDFAM10 | m_CORE2 | m_GENERIC,
1227
1228 /* X86_TUNE_EXT_80387_CONSTANTS */
1229 m_K6_GEODE | m_ATHLON_K8 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC,
1230
1231 /* X86_TUNE_SHORTEN_X87_SSE */
1232 ~m_K8,
1233
1234 /* X86_TUNE_AVOID_VECTOR_DECODE */
1235 m_K8 | m_GENERIC64,
1236
1237 /* X86_TUNE_PROMOTE_HIMODE_IMUL: Modern CPUs have same latency for HImode
1238 and SImode multiply, but 386 and 486 do HImode multiply faster. */
1239 ~(m_386 | m_486),
1240
1241 /* X86_TUNE_SLOW_IMUL_IMM32_MEM: Imul of 32-bit constant and memory is
1242 vector path on AMD machines. */
1243 m_K8 | m_GENERIC64 | m_AMDFAM10,
1244
1245 /* X86_TUNE_SLOW_IMUL_IMM8: Imul of 8-bit constant is vector path on AMD
1246 machines. */
1247 m_K8 | m_GENERIC64 | m_AMDFAM10,
1248
1249 /* X86_TUNE_MOVE_M1_VIA_OR: On pentiums, it is faster to load -1 via OR
1250 than a MOV. */
1251 m_PENT,
1252
1253 /* X86_TUNE_NOT_UNPAIRABLE: NOT is not pairable on Pentium, while XOR is,
1254 but one byte longer. */
1255 m_PENT,
1256
1257 /* X86_TUNE_NOT_VECTORMODE: On AMD K6, NOT is vector decoded with memory
1258 operand that cannot be represented using a modRM byte. The XOR
1259 replacement is long decoded, so this split helps here as well. */
1260 m_K6,
1261 };
1262
1263 /* Feature tests against the various architecture variations. */
1264 unsigned int ix86_arch_features[X86_ARCH_LAST] = {
1265 /* X86_ARCH_CMOVE: Conditional move was added for pentiumpro. */
1266 ~(m_386 | m_486 | m_PENT | m_K6),
1267
1268 /* X86_ARCH_CMPXCHG: Compare and exchange was added for 80486. */
1269 ~m_386,
1270
1271 /* X86_ARCH_CMPXCHG8B: Compare and exchange 8 bytes was added for pentium. */
1272 ~(m_386 | m_486),
1273
1274 /* X86_ARCH_XADD: Exchange and add was added for 80486. */
1275 ~m_386,
1276
1277 /* X86_ARCH_BSWAP: Byteswap was added for 80486. */
1278 ~m_386,
1279 };
1280
1281 static const unsigned int x86_accumulate_outgoing_args
1282 = m_ATHLON_K8_AMDFAM10 | m_PENT4 | m_NOCONA | m_PPRO | m_CORE2 | m_GENERIC;
1283
1284 static const unsigned int x86_arch_always_fancy_math_387
1285 = m_PENT | m_PPRO | m_ATHLON_K8_AMDFAM10 | m_PENT4
1286 | m_NOCONA | m_CORE2 | m_GENERIC;
1287
1288 static enum stringop_alg stringop_alg = no_stringop;
1289
1290 /* In case the average insn count for single function invocation is
1291 lower than this constant, emit fast (but longer) prologue and
1292 epilogue code. */
1293 #define FAST_PROLOGUE_INSN_COUNT 20
1294
1295 /* Names for 8 (low), 8 (high), and 16-bit registers, respectively. */
1296 static const char *const qi_reg_name[] = QI_REGISTER_NAMES;
1297 static const char *const qi_high_reg_name[] = QI_HIGH_REGISTER_NAMES;
1298 static const char *const hi_reg_name[] = HI_REGISTER_NAMES;
1299
1300 /* Array of the smallest class containing reg number REGNO, indexed by
1301 REGNO. Used by REGNO_REG_CLASS in i386.h. */
1302
1303 enum reg_class const regclass_map[FIRST_PSEUDO_REGISTER] =
1304 {
1305 /* ax, dx, cx, bx */
1306 AREG, DREG, CREG, BREG,
1307 /* si, di, bp, sp */
1308 SIREG, DIREG, NON_Q_REGS, NON_Q_REGS,
1309 /* FP registers */
1310 FP_TOP_REG, FP_SECOND_REG, FLOAT_REGS, FLOAT_REGS,
1311 FLOAT_REGS, FLOAT_REGS, FLOAT_REGS, FLOAT_REGS,
1312 /* arg pointer */
1313 NON_Q_REGS,
1314 /* flags, fpsr, fpcr, frame */
1315 NO_REGS, NO_REGS, NO_REGS, NON_Q_REGS,
1316 /* SSE registers */
1317 SSE_FIRST_REG, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1318 SSE_REGS, SSE_REGS,
1319 /* MMX registers */
1320 MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS, MMX_REGS,
1321 MMX_REGS, MMX_REGS,
1322 /* REX registers */
1323 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1324 NON_Q_REGS, NON_Q_REGS, NON_Q_REGS, NON_Q_REGS,
1325 /* SSE REX registers */
1326 SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS, SSE_REGS,
1327 SSE_REGS, SSE_REGS,
1328 };
1329
1330 /* The "default" register map used in 32bit mode. */
1331
1332 int const dbx_register_map[FIRST_PSEUDO_REGISTER] =
1333 {
1334 0, 2, 1, 3, 6, 7, 4, 5, /* general regs */
1335 12, 13, 14, 15, 16, 17, 18, 19, /* fp regs */
1336 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1337 21, 22, 23, 24, 25, 26, 27, 28, /* SSE */
1338 29, 30, 31, 32, 33, 34, 35, 36, /* MMX */
1339 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1340 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1341 };
1342
1343 static int const x86_64_int_parameter_registers[6] =
1344 {
1345 5 /*RDI*/, 4 /*RSI*/, 1 /*RDX*/, 2 /*RCX*/,
1346 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1347 };
1348
1349 static int const x86_64_ms_abi_int_parameter_registers[4] =
1350 {
1351 2 /*RCX*/, 1 /*RDX*/,
1352 FIRST_REX_INT_REG /*R8 */, FIRST_REX_INT_REG + 1 /*R9 */
1353 };
1354
1355 static int const x86_64_int_return_registers[4] =
1356 {
1357 0 /*RAX*/, 1 /*RDX*/, 5 /*RDI*/, 4 /*RSI*/
1358 };
1359
1360 /* The "default" register map used in 64bit mode. */
1361 int const dbx64_register_map[FIRST_PSEUDO_REGISTER] =
1362 {
1363 0, 1, 2, 3, 4, 5, 6, 7, /* general regs */
1364 33, 34, 35, 36, 37, 38, 39, 40, /* fp regs */
1365 -1, -1, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1366 17, 18, 19, 20, 21, 22, 23, 24, /* SSE */
1367 41, 42, 43, 44, 45, 46, 47, 48, /* MMX */
1368 8,9,10,11,12,13,14,15, /* extended integer registers */
1369 25, 26, 27, 28, 29, 30, 31, 32, /* extended SSE registers */
1370 };
1371
1372 /* Define the register numbers to be used in Dwarf debugging information.
1373 The SVR4 reference port C compiler uses the following register numbers
1374 in its Dwarf output code:
1375 0 for %eax (gcc regno = 0)
1376 1 for %ecx (gcc regno = 2)
1377 2 for %edx (gcc regno = 1)
1378 3 for %ebx (gcc regno = 3)
1379 4 for %esp (gcc regno = 7)
1380 5 for %ebp (gcc regno = 6)
1381 6 for %esi (gcc regno = 4)
1382 7 for %edi (gcc regno = 5)
1383 The following three DWARF register numbers are never generated by
1384 the SVR4 C compiler or by the GNU compilers, but SDB on x86/svr4
1385 believes these numbers have these meanings.
1386 8 for %eip (no gcc equivalent)
1387 9 for %eflags (gcc regno = 17)
1388 10 for %trapno (no gcc equivalent)
1389 It is not at all clear how we should number the FP stack registers
1390 for the x86 architecture. If the version of SDB on x86/svr4 were
1391 a bit less brain dead with respect to floating-point then we would
1392 have a precedent to follow with respect to DWARF register numbers
1393 for x86 FP registers, but the SDB on x86/svr4 is so completely
1394 broken with respect to FP registers that it is hardly worth thinking
1395 of it as something to strive for compatibility with.
1396 The version of x86/svr4 SDB I have at the moment does (partially)
1397 seem to believe that DWARF register number 11 is associated with
1398 the x86 register %st(0), but that's about all. Higher DWARF
1399 register numbers don't seem to be associated with anything in
1400 particular, and even for DWARF regno 11, SDB only seems to under-
1401 stand that it should say that a variable lives in %st(0) (when
1402 asked via an `=' command) if we said it was in DWARF regno 11,
1403 but SDB still prints garbage when asked for the value of the
1404 variable in question (via a `/' command).
1405 (Also note that the labels SDB prints for various FP stack regs
1406 when doing an `x' command are all wrong.)
1407 Note that these problems generally don't affect the native SVR4
1408 C compiler because it doesn't allow the use of -O with -g and
1409 because when it is *not* optimizing, it allocates a memory
1410 location for each floating-point variable, and the memory
1411 location is what gets described in the DWARF AT_location
1412 attribute for the variable in question.
1413 Regardless of the severe mental illness of the x86/svr4 SDB, we
1414 do something sensible here and we use the following DWARF
1415 register numbers. Note that these are all stack-top-relative
1416 numbers.
1417 11 for %st(0) (gcc regno = 8)
1418 12 for %st(1) (gcc regno = 9)
1419 13 for %st(2) (gcc regno = 10)
1420 14 for %st(3) (gcc regno = 11)
1421 15 for %st(4) (gcc regno = 12)
1422 16 for %st(5) (gcc regno = 13)
1423 17 for %st(6) (gcc regno = 14)
1424 18 for %st(7) (gcc regno = 15)
1425 */
1426 int const svr4_dbx_register_map[FIRST_PSEUDO_REGISTER] =
1427 {
1428 0, 2, 1, 3, 6, 7, 5, 4, /* general regs */
1429 11, 12, 13, 14, 15, 16, 17, 18, /* fp regs */
1430 -1, 9, -1, -1, -1, /* arg, flags, fpsr, fpcr, frame */
1431 21, 22, 23, 24, 25, 26, 27, 28, /* SSE registers */
1432 29, 30, 31, 32, 33, 34, 35, 36, /* MMX registers */
1433 -1, -1, -1, -1, -1, -1, -1, -1, /* extended integer registers */
1434 -1, -1, -1, -1, -1, -1, -1, -1, /* extended SSE registers */
1435 };
1436
1437 /* Test and compare insns in i386.md store the information needed to
1438 generate branch and scc insns here. */
1439
1440 rtx ix86_compare_op0 = NULL_RTX;
1441 rtx ix86_compare_op1 = NULL_RTX;
1442 rtx ix86_compare_emitted = NULL_RTX;
1443
1444 /* Size of the register save area. */
1445 #define X86_64_VARARGS_SIZE (REGPARM_MAX * UNITS_PER_WORD + SSE_REGPARM_MAX * 16)
1446
1447 /* Define the structure for the machine field in struct function. */
1448
1449 struct stack_local_entry GTY(())
1450 {
1451 unsigned short mode;
1452 unsigned short n;
1453 rtx rtl;
1454 struct stack_local_entry *next;
1455 };
1456
1457 /* Structure describing stack frame layout.
1458 Stack grows downward:
1459
1460 [arguments]
1461 <- ARG_POINTER
1462 saved pc
1463
1464 saved frame pointer if frame_pointer_needed
1465 <- HARD_FRAME_POINTER
1466 [saved regs]
1467
1468 [padding1] \
1469 )
1470 [va_arg registers] (
1471 > to_allocate <- FRAME_POINTER
1472 [frame] (
1473 )
1474 [padding2] /
1475 */
1476 struct ix86_frame
1477 {
1478 int nregs;
1479 int padding1;
1480 int va_arg_size;
1481 HOST_WIDE_INT frame;
1482 int padding2;
1483 int outgoing_arguments_size;
1484 int red_zone_size;
1485
1486 HOST_WIDE_INT to_allocate;
1487 /* The offsets relative to ARG_POINTER. */
1488 HOST_WIDE_INT frame_pointer_offset;
1489 HOST_WIDE_INT hard_frame_pointer_offset;
1490 HOST_WIDE_INT stack_pointer_offset;
1491
1492 /* When save_regs_using_mov is set, emit prologue using
1493 move instead of push instructions. */
1494 bool save_regs_using_mov;
1495 };
1496
1497 /* Code model option. */
1498 enum cmodel ix86_cmodel;
1499 /* Asm dialect. */
1500 enum asm_dialect ix86_asm_dialect = ASM_ATT;
1501 /* TLS dialects. */
1502 enum tls_dialect ix86_tls_dialect = TLS_DIALECT_GNU;
1503
1504 /* Which unit we are generating floating point math for. */
1505 enum fpmath_unit ix86_fpmath;
1506
1507 /* Which cpu are we scheduling for. */
1508 enum processor_type ix86_tune;
1509
1510 /* Which instruction set architecture to use. */
1511 enum processor_type ix86_arch;
1512
1513 /* true if sse prefetch instruction is not NOOP. */
1514 int x86_prefetch_sse;
1515
1516 /* ix86_regparm_string as a number */
1517 static int ix86_regparm;
1518
1519 /* -mstackrealign option */
1520 extern int ix86_force_align_arg_pointer;
1521 static const char ix86_force_align_arg_pointer_string[] = "force_align_arg_pointer";
1522
1523 /* Preferred alignment for stack boundary in bits. */
1524 unsigned int ix86_preferred_stack_boundary;
1525
1526 /* Values 1-5: see jump.c */
1527 int ix86_branch_cost;
1528
1529 /* Variables which are this size or smaller are put in the data/bss
1530 or ldata/lbss sections. */
1531
1532 int ix86_section_threshold = 65536;
1533
1534 /* Prefix built by ASM_GENERATE_INTERNAL_LABEL. */
1535 char internal_label_prefix[16];
1536 int internal_label_prefix_len;
1537
1538 /* Fence to use after loop using movnt. */
1539 tree x86_mfence;
1540
1541 /* Register class used for passing given 64bit part of the argument.
1542 These represent classes as documented by the PS ABI, with the exception
1543 of SSESF, SSEDF classes, that are basically SSE class, just gcc will
1544 use SF or DFmode move instead of DImode to avoid reformatting penalties.
1545
1546 Similarly we play games with INTEGERSI_CLASS to use cheaper SImode moves
1547 whenever possible (upper half does contain padding). */
1548 enum x86_64_reg_class
1549 {
1550 X86_64_NO_CLASS,
1551 X86_64_INTEGER_CLASS,
1552 X86_64_INTEGERSI_CLASS,
1553 X86_64_SSE_CLASS,
1554 X86_64_SSESF_CLASS,
1555 X86_64_SSEDF_CLASS,
1556 X86_64_SSEUP_CLASS,
1557 X86_64_X87_CLASS,
1558 X86_64_X87UP_CLASS,
1559 X86_64_COMPLEX_X87_CLASS,
1560 X86_64_MEMORY_CLASS
1561 };
1562 static const char * const x86_64_reg_class_name[] =
1563 {
1564 "no", "integer", "integerSI", "sse", "sseSF", "sseDF",
1565 "sseup", "x87", "x87up", "cplx87", "no"
1566 };
1567
1568 #define MAX_CLASSES 4
1569
1570 /* Table of constants used by fldpi, fldln2, etc.... */
1571 static REAL_VALUE_TYPE ext_80387_constants_table [5];
1572 static bool ext_80387_constants_init = 0;
1573
1574 \f
1575 static struct machine_function * ix86_init_machine_status (void);
1576 static rtx ix86_function_value (const_tree, const_tree, bool);
1577 static int ix86_function_regparm (const_tree, const_tree);
1578 static void ix86_compute_frame_layout (struct ix86_frame *);
1579 static bool ix86_expand_vector_init_one_nonzero (bool, enum machine_mode,
1580 rtx, rtx, int);
1581
1582 \f
1583 /* The svr4 ABI for the i386 says that records and unions are returned
1584 in memory. */
1585 #ifndef DEFAULT_PCC_STRUCT_RETURN
1586 #define DEFAULT_PCC_STRUCT_RETURN 1
1587 #endif
1588
1589 /* Bit flags that specify the ISA we are compiling for. */
1590 int ix86_isa_flags = TARGET_64BIT_DEFAULT | TARGET_SUBTARGET_ISA_DEFAULT;
1591
1592 /* A mask of ix86_isa_flags that includes bit X if X
1593 was set or cleared on the command line. */
1594 static int ix86_isa_flags_explicit;
1595
1596 /* Define a set of ISAs which aren't available for a given ISA. MMX
1597 and SSE ISAs are handled separately. */
1598
1599 #define OPTION_MASK_ISA_MMX_UNSET \
1600 (OPTION_MASK_ISA_3DNOW | OPTION_MASK_ISA_3DNOW_UNSET)
1601 #define OPTION_MASK_ISA_3DNOW_UNSET OPTION_MASK_ISA_3DNOW_A
1602
1603 #define OPTION_MASK_ISA_SSE_UNSET \
1604 (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE2_UNSET)
1605 #define OPTION_MASK_ISA_SSE2_UNSET \
1606 (OPTION_MASK_ISA_SSE3 | OPTION_MASK_ISA_SSE3_UNSET)
1607 #define OPTION_MASK_ISA_SSE3_UNSET \
1608 (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_SSSE3_UNSET)
1609 #define OPTION_MASK_ISA_SSSE3_UNSET \
1610 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_1_UNSET)
1611 #define OPTION_MASK_ISA_SSE4_1_UNSET \
1612 (OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_SSE4_2_UNSET)
1613 #define OPTION_MASK_ISA_SSE4_2_UNSET OPTION_MASK_ISA_SSE4A
1614
1615 /* SSE4 includes both SSE4.1 and SSE4.2. -msse4 should be the same
1616 as -msse4.1 -msse4.2. -mno-sse4 should the same as -mno-sse4.1. */
1617 #define OPTION_MASK_ISA_SSE4 \
1618 (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_SSE4_2)
1619 #define OPTION_MASK_ISA_SSE4_UNSET OPTION_MASK_ISA_SSE4_1_UNSET
1620
1621 #define OPTION_MASK_ISA_SSE4A_UNSET OPTION_MASK_ISA_SSE4
1622
1623 /* Vectorization library interface and handlers. */
1624 tree (*ix86_veclib_handler)(enum built_in_function, tree, tree) = NULL;
1625 static tree ix86_veclibabi_acml (enum built_in_function, tree, tree);
1626
1627 /* Implement TARGET_HANDLE_OPTION. */
1628
1629 static bool
1630 ix86_handle_option (size_t code, const char *arg ATTRIBUTE_UNUSED, int value)
1631 {
1632 switch (code)
1633 {
1634 case OPT_mmmx:
1635 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX;
1636 if (!value)
1637 {
1638 ix86_isa_flags &= ~OPTION_MASK_ISA_MMX_UNSET;
1639 ix86_isa_flags_explicit |= OPTION_MASK_ISA_MMX_UNSET;
1640 }
1641 return true;
1642
1643 case OPT_m3dnow:
1644 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW;
1645 if (!value)
1646 {
1647 ix86_isa_flags &= ~OPTION_MASK_ISA_3DNOW_UNSET;
1648 ix86_isa_flags_explicit |= OPTION_MASK_ISA_3DNOW_UNSET;
1649 }
1650 return true;
1651
1652 case OPT_m3dnowa:
1653 return false;
1654
1655 case OPT_msse:
1656 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE;
1657 if (!value)
1658 {
1659 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE_UNSET;
1660 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE_UNSET;
1661 }
1662 return true;
1663
1664 case OPT_msse2:
1665 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2;
1666 if (!value)
1667 {
1668 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE2_UNSET;
1669 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE2_UNSET;
1670 }
1671 return true;
1672
1673 case OPT_msse3:
1674 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3;
1675 if (!value)
1676 {
1677 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE3_UNSET;
1678 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE3_UNSET;
1679 }
1680 return true;
1681
1682 case OPT_mssse3:
1683 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3;
1684 if (!value)
1685 {
1686 ix86_isa_flags &= ~OPTION_MASK_ISA_SSSE3_UNSET;
1687 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSSE3_UNSET;
1688 }
1689 return true;
1690
1691 case OPT_msse4_1:
1692 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1;
1693 if (!value)
1694 {
1695 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_1_UNSET;
1696 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_1_UNSET;
1697 }
1698 return true;
1699
1700 case OPT_msse4_2:
1701 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2;
1702 if (!value)
1703 {
1704 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_2_UNSET;
1705 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_2_UNSET;
1706 }
1707 return true;
1708
1709 case OPT_msse4:
1710 ix86_isa_flags |= OPTION_MASK_ISA_SSE4;
1711 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4;
1712 return true;
1713
1714 case OPT_mno_sse4:
1715 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4_UNSET;
1716 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4_UNSET;
1717 return true;
1718
1719 case OPT_msse4a:
1720 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A;
1721 if (!value)
1722 {
1723 ix86_isa_flags &= ~OPTION_MASK_ISA_SSE4A_UNSET;
1724 ix86_isa_flags_explicit |= OPTION_MASK_ISA_SSE4A_UNSET;
1725 }
1726 return true;
1727
1728 default:
1729 return true;
1730 }
1731 }
1732
1733 /* Sometimes certain combinations of command options do not make
1734 sense on a particular target machine. You can define a macro
1735 `OVERRIDE_OPTIONS' to take account of this. This macro, if
1736 defined, is executed once just after all the command options have
1737 been parsed.
1738
1739 Don't use this macro to turn on various extra optimizations for
1740 `-O'. That is what `OPTIMIZATION_OPTIONS' is for. */
1741
1742 void
1743 override_options (void)
1744 {
1745 int i;
1746 int ix86_tune_defaulted = 0;
1747 int ix86_arch_specified = 0;
1748 unsigned int ix86_arch_mask, ix86_tune_mask;
1749
1750 /* Comes from final.c -- no real reason to change it. */
1751 #define MAX_CODE_ALIGN 16
1752
1753 static struct ptt
1754 {
1755 const struct processor_costs *cost; /* Processor costs */
1756 const int align_loop; /* Default alignments. */
1757 const int align_loop_max_skip;
1758 const int align_jump;
1759 const int align_jump_max_skip;
1760 const int align_func;
1761 }
1762 const processor_target_table[PROCESSOR_max] =
1763 {
1764 {&i386_cost, 4, 3, 4, 3, 4},
1765 {&i486_cost, 16, 15, 16, 15, 16},
1766 {&pentium_cost, 16, 7, 16, 7, 16},
1767 {&pentiumpro_cost, 16, 15, 16, 10, 16},
1768 {&geode_cost, 0, 0, 0, 0, 0},
1769 {&k6_cost, 32, 7, 32, 7, 32},
1770 {&athlon_cost, 16, 7, 16, 7, 16},
1771 {&pentium4_cost, 0, 0, 0, 0, 0},
1772 {&k8_cost, 16, 7, 16, 7, 16},
1773 {&nocona_cost, 0, 0, 0, 0, 0},
1774 {&core2_cost, 16, 10, 16, 10, 16},
1775 {&generic32_cost, 16, 7, 16, 7, 16},
1776 {&generic64_cost, 16, 10, 16, 10, 16},
1777 {&amdfam10_cost, 32, 24, 32, 7, 32}
1778 };
1779
1780 static const char * const cpu_names[] = TARGET_CPU_DEFAULT_NAMES;
1781 enum pta_flags
1782 {
1783 PTA_SSE = 1 << 0,
1784 PTA_SSE2 = 1 << 1,
1785 PTA_SSE3 = 1 << 2,
1786 PTA_MMX = 1 << 3,
1787 PTA_PREFETCH_SSE = 1 << 4,
1788 PTA_3DNOW = 1 << 5,
1789 PTA_3DNOW_A = 1 << 6,
1790 PTA_64BIT = 1 << 7,
1791 PTA_SSSE3 = 1 << 8,
1792 PTA_CX16 = 1 << 9,
1793 PTA_POPCNT = 1 << 10,
1794 PTA_ABM = 1 << 11,
1795 PTA_SSE4A = 1 << 12,
1796 PTA_NO_SAHF = 1 << 13,
1797 PTA_SSE4_1 = 1 << 14,
1798 PTA_SSE4_2 = 1 << 15
1799 };
1800
1801 static struct pta
1802 {
1803 const char *const name; /* processor name or nickname. */
1804 const enum processor_type processor;
1805 const unsigned /*enum pta_flags*/ flags;
1806 }
1807 const processor_alias_table[] =
1808 {
1809 {"i386", PROCESSOR_I386, 0},
1810 {"i486", PROCESSOR_I486, 0},
1811 {"i586", PROCESSOR_PENTIUM, 0},
1812 {"pentium", PROCESSOR_PENTIUM, 0},
1813 {"pentium-mmx", PROCESSOR_PENTIUM, PTA_MMX},
1814 {"winchip-c6", PROCESSOR_I486, PTA_MMX},
1815 {"winchip2", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1816 {"c3", PROCESSOR_I486, PTA_MMX | PTA_3DNOW},
1817 {"c3-2", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1818 {"i686", PROCESSOR_PENTIUMPRO, 0},
1819 {"pentiumpro", PROCESSOR_PENTIUMPRO, 0},
1820 {"pentium2", PROCESSOR_PENTIUMPRO, PTA_MMX},
1821 {"pentium3", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1822 {"pentium3m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE},
1823 {"pentium-m", PROCESSOR_PENTIUMPRO, PTA_MMX | PTA_SSE | PTA_SSE2},
1824 {"pentium4", PROCESSOR_PENTIUM4, PTA_MMX |PTA_SSE | PTA_SSE2},
1825 {"pentium4m", PROCESSOR_PENTIUM4, PTA_MMX | PTA_SSE | PTA_SSE2},
1826 {"prescott", PROCESSOR_NOCONA, PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3},
1827 {"nocona", PROCESSOR_NOCONA, (PTA_64BIT
1828 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1829 | PTA_CX16 | PTA_NO_SAHF)},
1830 {"core2", PROCESSOR_CORE2, (PTA_64BIT
1831 | PTA_MMX | PTA_SSE | PTA_SSE2 | PTA_SSE3
1832 | PTA_SSSE3
1833 | PTA_CX16)},
1834 {"geode", PROCESSOR_GEODE, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1835 |PTA_PREFETCH_SSE)},
1836 {"k6", PROCESSOR_K6, PTA_MMX},
1837 {"k6-2", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1838 {"k6-3", PROCESSOR_K6, PTA_MMX | PTA_3DNOW},
1839 {"athlon", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1840 | PTA_PREFETCH_SSE)},
1841 {"athlon-tbird", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1842 | PTA_PREFETCH_SSE)},
1843 {"athlon-4", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1844 | PTA_SSE)},
1845 {"athlon-xp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1846 | PTA_SSE)},
1847 {"athlon-mp", PROCESSOR_ATHLON, (PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1848 | PTA_SSE)},
1849 {"x86-64", PROCESSOR_K8, (PTA_64BIT
1850 | PTA_MMX | PTA_SSE | PTA_SSE2
1851 | PTA_NO_SAHF)},
1852 {"k8", PROCESSOR_K8, (PTA_64BIT
1853 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1854 | PTA_SSE | PTA_SSE2
1855 | PTA_NO_SAHF)},
1856 {"k8-sse3", PROCESSOR_K8, (PTA_64BIT
1857 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1858 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1859 | PTA_NO_SAHF)},
1860 {"opteron", PROCESSOR_K8, (PTA_64BIT
1861 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1862 | PTA_SSE | PTA_SSE2
1863 | PTA_NO_SAHF)},
1864 {"opteron-sse3", PROCESSOR_K8, (PTA_64BIT
1865 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1866 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1867 | PTA_NO_SAHF)},
1868 {"athlon64", PROCESSOR_K8, (PTA_64BIT
1869 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1870 | PTA_SSE | PTA_SSE2
1871 | PTA_NO_SAHF)},
1872 {"athlon64-sse3", PROCESSOR_K8, (PTA_64BIT
1873 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1874 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1875 | PTA_NO_SAHF)},
1876 {"athlon-fx", PROCESSOR_K8, (PTA_64BIT
1877 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1878 | PTA_SSE | PTA_SSE2
1879 | PTA_NO_SAHF)},
1880 {"amdfam10", PROCESSOR_AMDFAM10, (PTA_64BIT
1881 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1882 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1883 | PTA_SSE4A
1884 | PTA_CX16 | PTA_ABM)},
1885 {"barcelona", PROCESSOR_AMDFAM10, (PTA_64BIT
1886 | PTA_MMX | PTA_3DNOW | PTA_3DNOW_A
1887 | PTA_SSE | PTA_SSE2 | PTA_SSE3
1888 | PTA_SSE4A
1889 | PTA_CX16 | PTA_ABM)},
1890 {"generic32", PROCESSOR_GENERIC32, 0 /* flags are only used for -march switch. */ },
1891 {"generic64", PROCESSOR_GENERIC64, PTA_64BIT /* flags are only used for -march switch. */ },
1892 };
1893
1894 int const pta_size = ARRAY_SIZE (processor_alias_table);
1895
1896 #ifdef SUBTARGET_OVERRIDE_OPTIONS
1897 SUBTARGET_OVERRIDE_OPTIONS;
1898 #endif
1899
1900 #ifdef SUBSUBTARGET_OVERRIDE_OPTIONS
1901 SUBSUBTARGET_OVERRIDE_OPTIONS;
1902 #endif
1903
1904 /* -fPIC is the default for x86_64. */
1905 if (TARGET_MACHO && TARGET_64BIT)
1906 flag_pic = 2;
1907
1908 /* Set the default values for switches whose default depends on TARGET_64BIT
1909 in case they weren't overwritten by command line options. */
1910 if (TARGET_64BIT)
1911 {
1912 /* Mach-O doesn't support omitting the frame pointer for now. */
1913 if (flag_omit_frame_pointer == 2)
1914 flag_omit_frame_pointer = (TARGET_MACHO ? 0 : 1);
1915 if (flag_asynchronous_unwind_tables == 2)
1916 flag_asynchronous_unwind_tables = 1;
1917 if (flag_pcc_struct_return == 2)
1918 flag_pcc_struct_return = 0;
1919 }
1920 else
1921 {
1922 if (flag_omit_frame_pointer == 2)
1923 flag_omit_frame_pointer = 0;
1924 if (flag_asynchronous_unwind_tables == 2)
1925 flag_asynchronous_unwind_tables = 0;
1926 if (flag_pcc_struct_return == 2)
1927 flag_pcc_struct_return = DEFAULT_PCC_STRUCT_RETURN;
1928 }
1929
1930 /* Need to check -mtune=generic first. */
1931 if (ix86_tune_string)
1932 {
1933 if (!strcmp (ix86_tune_string, "generic")
1934 || !strcmp (ix86_tune_string, "i686")
1935 /* As special support for cross compilers we read -mtune=native
1936 as -mtune=generic. With native compilers we won't see the
1937 -mtune=native, as it was changed by the driver. */
1938 || !strcmp (ix86_tune_string, "native"))
1939 {
1940 if (TARGET_64BIT)
1941 ix86_tune_string = "generic64";
1942 else
1943 ix86_tune_string = "generic32";
1944 }
1945 else if (!strncmp (ix86_tune_string, "generic", 7))
1946 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
1947 }
1948 else
1949 {
1950 if (ix86_arch_string)
1951 ix86_tune_string = ix86_arch_string;
1952 if (!ix86_tune_string)
1953 {
1954 ix86_tune_string = cpu_names [TARGET_CPU_DEFAULT];
1955 ix86_tune_defaulted = 1;
1956 }
1957
1958 /* ix86_tune_string is set to ix86_arch_string or defaulted. We
1959 need to use a sensible tune option. */
1960 if (!strcmp (ix86_tune_string, "generic")
1961 || !strcmp (ix86_tune_string, "x86-64")
1962 || !strcmp (ix86_tune_string, "i686"))
1963 {
1964 if (TARGET_64BIT)
1965 ix86_tune_string = "generic64";
1966 else
1967 ix86_tune_string = "generic32";
1968 }
1969 }
1970 if (ix86_stringop_string)
1971 {
1972 if (!strcmp (ix86_stringop_string, "rep_byte"))
1973 stringop_alg = rep_prefix_1_byte;
1974 else if (!strcmp (ix86_stringop_string, "libcall"))
1975 stringop_alg = libcall;
1976 else if (!strcmp (ix86_stringop_string, "rep_4byte"))
1977 stringop_alg = rep_prefix_4_byte;
1978 else if (!strcmp (ix86_stringop_string, "rep_8byte"))
1979 stringop_alg = rep_prefix_8_byte;
1980 else if (!strcmp (ix86_stringop_string, "byte_loop"))
1981 stringop_alg = loop_1_byte;
1982 else if (!strcmp (ix86_stringop_string, "loop"))
1983 stringop_alg = loop;
1984 else if (!strcmp (ix86_stringop_string, "unrolled_loop"))
1985 stringop_alg = unrolled_loop;
1986 else
1987 error ("bad value (%s) for -mstringop-strategy= switch", ix86_stringop_string);
1988 }
1989 if (!strcmp (ix86_tune_string, "x86-64"))
1990 warning (OPT_Wdeprecated, "-mtune=x86-64 is deprecated. Use -mtune=k8 or "
1991 "-mtune=generic instead as appropriate.");
1992
1993 if (!ix86_arch_string)
1994 ix86_arch_string = TARGET_64BIT ? "x86-64" : "i386";
1995 else
1996 ix86_arch_specified = 1;
1997
1998 if (!strcmp (ix86_arch_string, "generic"))
1999 error ("generic CPU can be used only for -mtune= switch");
2000 if (!strncmp (ix86_arch_string, "generic", 7))
2001 error ("bad value (%s) for -march= switch", ix86_arch_string);
2002
2003 if (ix86_cmodel_string != 0)
2004 {
2005 if (!strcmp (ix86_cmodel_string, "small"))
2006 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2007 else if (!strcmp (ix86_cmodel_string, "medium"))
2008 ix86_cmodel = flag_pic ? CM_MEDIUM_PIC : CM_MEDIUM;
2009 else if (!strcmp (ix86_cmodel_string, "large"))
2010 ix86_cmodel = flag_pic ? CM_LARGE_PIC : CM_LARGE;
2011 else if (flag_pic)
2012 error ("code model %s does not support PIC mode", ix86_cmodel_string);
2013 else if (!strcmp (ix86_cmodel_string, "32"))
2014 ix86_cmodel = CM_32;
2015 else if (!strcmp (ix86_cmodel_string, "kernel") && !flag_pic)
2016 ix86_cmodel = CM_KERNEL;
2017 else
2018 error ("bad value (%s) for -mcmodel= switch", ix86_cmodel_string);
2019 }
2020 else
2021 {
2022 /* For TARGET_64BIT_MS_ABI, force pic on, in order to enable the
2023 use of rip-relative addressing. This eliminates fixups that
2024 would otherwise be needed if this object is to be placed in a
2025 DLL, and is essentially just as efficient as direct addressing. */
2026 if (TARGET_64BIT_MS_ABI)
2027 ix86_cmodel = CM_SMALL_PIC, flag_pic = 1;
2028 else if (TARGET_64BIT)
2029 ix86_cmodel = flag_pic ? CM_SMALL_PIC : CM_SMALL;
2030 else
2031 ix86_cmodel = CM_32;
2032 }
2033 if (ix86_asm_string != 0)
2034 {
2035 if (! TARGET_MACHO
2036 && !strcmp (ix86_asm_string, "intel"))
2037 ix86_asm_dialect = ASM_INTEL;
2038 else if (!strcmp (ix86_asm_string, "att"))
2039 ix86_asm_dialect = ASM_ATT;
2040 else
2041 error ("bad value (%s) for -masm= switch", ix86_asm_string);
2042 }
2043 if ((TARGET_64BIT == 0) != (ix86_cmodel == CM_32))
2044 error ("code model %qs not supported in the %s bit mode",
2045 ix86_cmodel_string, TARGET_64BIT ? "64" : "32");
2046 if ((TARGET_64BIT != 0) != ((ix86_isa_flags & OPTION_MASK_ISA_64BIT) != 0))
2047 sorry ("%i-bit mode not compiled in",
2048 (ix86_isa_flags & OPTION_MASK_ISA_64BIT) ? 64 : 32);
2049
2050 for (i = 0; i < pta_size; i++)
2051 if (! strcmp (ix86_arch_string, processor_alias_table[i].name))
2052 {
2053 ix86_arch = processor_alias_table[i].processor;
2054 /* Default cpu tuning to the architecture. */
2055 ix86_tune = ix86_arch;
2056
2057 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2058 error ("CPU you selected does not support x86-64 "
2059 "instruction set");
2060
2061 if (processor_alias_table[i].flags & PTA_MMX
2062 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_MMX))
2063 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2064 if (processor_alias_table[i].flags & PTA_3DNOW
2065 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW))
2066 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW;
2067 if (processor_alias_table[i].flags & PTA_3DNOW_A
2068 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_3DNOW_A))
2069 ix86_isa_flags |= OPTION_MASK_ISA_3DNOW_A;
2070 if (processor_alias_table[i].flags & PTA_SSE
2071 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE))
2072 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2073 if (processor_alias_table[i].flags & PTA_SSE2
2074 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE2))
2075 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2076 if (processor_alias_table[i].flags & PTA_SSE3
2077 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE3))
2078 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2079 if (processor_alias_table[i].flags & PTA_SSSE3
2080 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSSE3))
2081 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2082 if (processor_alias_table[i].flags & PTA_SSE4_1
2083 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_1))
2084 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2085 if (processor_alias_table[i].flags & PTA_SSE4_2
2086 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4_2))
2087 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_2;
2088 if (processor_alias_table[i].flags & PTA_SSE4A
2089 && !(ix86_isa_flags_explicit & OPTION_MASK_ISA_SSE4A))
2090 ix86_isa_flags |= OPTION_MASK_ISA_SSE4A;
2091
2092 if (processor_alias_table[i].flags & PTA_ABM)
2093 x86_abm = true;
2094 if (processor_alias_table[i].flags & PTA_CX16)
2095 x86_cmpxchg16b = true;
2096 if (processor_alias_table[i].flags & (PTA_POPCNT | PTA_ABM))
2097 x86_popcnt = true;
2098 if (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE))
2099 x86_prefetch_sse = true;
2100 if (!(TARGET_64BIT && (processor_alias_table[i].flags & PTA_NO_SAHF)))
2101 x86_sahf = true;
2102
2103 break;
2104 }
2105
2106 if (i == pta_size)
2107 error ("bad value (%s) for -march= switch", ix86_arch_string);
2108
2109 ix86_arch_mask = 1u << ix86_arch;
2110 for (i = 0; i < X86_ARCH_LAST; ++i)
2111 ix86_arch_features[i] &= ix86_arch_mask;
2112
2113 for (i = 0; i < pta_size; i++)
2114 if (! strcmp (ix86_tune_string, processor_alias_table[i].name))
2115 {
2116 ix86_tune = processor_alias_table[i].processor;
2117 if (TARGET_64BIT && !(processor_alias_table[i].flags & PTA_64BIT))
2118 {
2119 if (ix86_tune_defaulted)
2120 {
2121 ix86_tune_string = "x86-64";
2122 for (i = 0; i < pta_size; i++)
2123 if (! strcmp (ix86_tune_string,
2124 processor_alias_table[i].name))
2125 break;
2126 ix86_tune = processor_alias_table[i].processor;
2127 }
2128 else
2129 error ("CPU you selected does not support x86-64 "
2130 "instruction set");
2131 }
2132 /* Intel CPUs have always interpreted SSE prefetch instructions as
2133 NOPs; so, we can enable SSE prefetch instructions even when
2134 -mtune (rather than -march) points us to a processor that has them.
2135 However, the VIA C3 gives a SIGILL, so we only do that for i686 and
2136 higher processors. */
2137 if (TARGET_CMOVE
2138 && (processor_alias_table[i].flags & (PTA_PREFETCH_SSE | PTA_SSE)))
2139 x86_prefetch_sse = true;
2140 break;
2141 }
2142 if (i == pta_size)
2143 error ("bad value (%s) for -mtune= switch", ix86_tune_string);
2144
2145 ix86_tune_mask = 1u << ix86_tune;
2146 for (i = 0; i < X86_TUNE_LAST; ++i)
2147 ix86_tune_features[i] &= ix86_tune_mask;
2148
2149 if (optimize_size)
2150 ix86_cost = &size_cost;
2151 else
2152 ix86_cost = processor_target_table[ix86_tune].cost;
2153
2154 /* Arrange to set up i386_stack_locals for all functions. */
2155 init_machine_status = ix86_init_machine_status;
2156
2157 /* Validate -mregparm= value. */
2158 if (ix86_regparm_string)
2159 {
2160 if (TARGET_64BIT)
2161 warning (0, "-mregparm is ignored in 64-bit mode");
2162 i = atoi (ix86_regparm_string);
2163 if (i < 0 || i > REGPARM_MAX)
2164 error ("-mregparm=%d is not between 0 and %d", i, REGPARM_MAX);
2165 else
2166 ix86_regparm = i;
2167 }
2168 if (TARGET_64BIT)
2169 ix86_regparm = REGPARM_MAX;
2170
2171 /* If the user has provided any of the -malign-* options,
2172 warn and use that value only if -falign-* is not set.
2173 Remove this code in GCC 3.2 or later. */
2174 if (ix86_align_loops_string)
2175 {
2176 warning (0, "-malign-loops is obsolete, use -falign-loops");
2177 if (align_loops == 0)
2178 {
2179 i = atoi (ix86_align_loops_string);
2180 if (i < 0 || i > MAX_CODE_ALIGN)
2181 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2182 else
2183 align_loops = 1 << i;
2184 }
2185 }
2186
2187 if (ix86_align_jumps_string)
2188 {
2189 warning (0, "-malign-jumps is obsolete, use -falign-jumps");
2190 if (align_jumps == 0)
2191 {
2192 i = atoi (ix86_align_jumps_string);
2193 if (i < 0 || i > MAX_CODE_ALIGN)
2194 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2195 else
2196 align_jumps = 1 << i;
2197 }
2198 }
2199
2200 if (ix86_align_funcs_string)
2201 {
2202 warning (0, "-malign-functions is obsolete, use -falign-functions");
2203 if (align_functions == 0)
2204 {
2205 i = atoi (ix86_align_funcs_string);
2206 if (i < 0 || i > MAX_CODE_ALIGN)
2207 error ("-malign-loops=%d is not between 0 and %d", i, MAX_CODE_ALIGN);
2208 else
2209 align_functions = 1 << i;
2210 }
2211 }
2212
2213 /* Default align_* from the processor table. */
2214 if (align_loops == 0)
2215 {
2216 align_loops = processor_target_table[ix86_tune].align_loop;
2217 align_loops_max_skip = processor_target_table[ix86_tune].align_loop_max_skip;
2218 }
2219 if (align_jumps == 0)
2220 {
2221 align_jumps = processor_target_table[ix86_tune].align_jump;
2222 align_jumps_max_skip = processor_target_table[ix86_tune].align_jump_max_skip;
2223 }
2224 if (align_functions == 0)
2225 {
2226 align_functions = processor_target_table[ix86_tune].align_func;
2227 }
2228
2229 /* Validate -mbranch-cost= value, or provide default. */
2230 ix86_branch_cost = ix86_cost->branch_cost;
2231 if (ix86_branch_cost_string)
2232 {
2233 i = atoi (ix86_branch_cost_string);
2234 if (i < 0 || i > 5)
2235 error ("-mbranch-cost=%d is not between 0 and 5", i);
2236 else
2237 ix86_branch_cost = i;
2238 }
2239 if (ix86_section_threshold_string)
2240 {
2241 i = atoi (ix86_section_threshold_string);
2242 if (i < 0)
2243 error ("-mlarge-data-threshold=%d is negative", i);
2244 else
2245 ix86_section_threshold = i;
2246 }
2247
2248 if (ix86_tls_dialect_string)
2249 {
2250 if (strcmp (ix86_tls_dialect_string, "gnu") == 0)
2251 ix86_tls_dialect = TLS_DIALECT_GNU;
2252 else if (strcmp (ix86_tls_dialect_string, "gnu2") == 0)
2253 ix86_tls_dialect = TLS_DIALECT_GNU2;
2254 else if (strcmp (ix86_tls_dialect_string, "sun") == 0)
2255 ix86_tls_dialect = TLS_DIALECT_SUN;
2256 else
2257 error ("bad value (%s) for -mtls-dialect= switch",
2258 ix86_tls_dialect_string);
2259 }
2260
2261 if (ix87_precision_string)
2262 {
2263 i = atoi (ix87_precision_string);
2264 if (i != 32 && i != 64 && i != 80)
2265 error ("pc%d is not valid precision setting (32, 64 or 80)", i);
2266 }
2267
2268 if (TARGET_64BIT)
2269 {
2270 target_flags |= TARGET_SUBTARGET64_DEFAULT & ~target_flags_explicit;
2271
2272 /* Enable by default the SSE and MMX builtins. Do allow the user to
2273 explicitly disable any of these. In particular, disabling SSE and
2274 MMX for kernel code is extremely useful. */
2275 if (!ix86_arch_specified)
2276 ix86_isa_flags
2277 |= ((OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX
2278 | TARGET_SUBTARGET64_ISA_DEFAULT) & ~ix86_isa_flags_explicit);
2279
2280 if (TARGET_RTD)
2281 warning (0, "-mrtd is ignored in 64bit mode");
2282 }
2283 else
2284 {
2285 target_flags |= TARGET_SUBTARGET32_DEFAULT & ~target_flags_explicit;
2286
2287 if (!ix86_arch_specified)
2288 ix86_isa_flags
2289 |= TARGET_SUBTARGET32_ISA_DEFAULT & ~ix86_isa_flags_explicit;
2290
2291 /* i386 ABI does not specify red zone. It still makes sense to use it
2292 when programmer takes care to stack from being destroyed. */
2293 if (!(target_flags_explicit & MASK_NO_RED_ZONE))
2294 target_flags |= MASK_NO_RED_ZONE;
2295 }
2296
2297 /* Keep nonleaf frame pointers. */
2298 if (flag_omit_frame_pointer)
2299 target_flags &= ~MASK_OMIT_LEAF_FRAME_POINTER;
2300 else if (TARGET_OMIT_LEAF_FRAME_POINTER)
2301 flag_omit_frame_pointer = 1;
2302
2303 /* If we're doing fast math, we don't care about comparison order
2304 wrt NaNs. This lets us use a shorter comparison sequence. */
2305 if (flag_finite_math_only)
2306 target_flags &= ~MASK_IEEE_FP;
2307
2308 /* If the architecture always has an FPU, turn off NO_FANCY_MATH_387,
2309 since the insns won't need emulation. */
2310 if (x86_arch_always_fancy_math_387 & ix86_arch_mask)
2311 target_flags &= ~MASK_NO_FANCY_MATH_387;
2312
2313 /* Likewise, if the target doesn't have a 387, or we've specified
2314 software floating point, don't use 387 inline intrinsics. */
2315 if (!TARGET_80387)
2316 target_flags |= MASK_NO_FANCY_MATH_387;
2317
2318 /* Turn on SSE4.1 builtins for -msse4.2. */
2319 if (TARGET_SSE4_2)
2320 ix86_isa_flags |= OPTION_MASK_ISA_SSE4_1;
2321
2322 /* Turn on SSSE3 builtins for -msse4.1. */
2323 if (TARGET_SSE4_1)
2324 ix86_isa_flags |= OPTION_MASK_ISA_SSSE3;
2325
2326 /* Turn on SSE3 builtins for -mssse3. */
2327 if (TARGET_SSSE3)
2328 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2329
2330 /* Turn on SSE3 builtins for -msse4a. */
2331 if (TARGET_SSE4A)
2332 ix86_isa_flags |= OPTION_MASK_ISA_SSE3;
2333
2334 /* Turn on SSE2 builtins for -msse3. */
2335 if (TARGET_SSE3)
2336 ix86_isa_flags |= OPTION_MASK_ISA_SSE2;
2337
2338 /* Turn on SSE builtins for -msse2. */
2339 if (TARGET_SSE2)
2340 ix86_isa_flags |= OPTION_MASK_ISA_SSE;
2341
2342 /* Turn on MMX builtins for -msse. */
2343 if (TARGET_SSE)
2344 {
2345 ix86_isa_flags |= OPTION_MASK_ISA_MMX & ~ix86_isa_flags_explicit;
2346 x86_prefetch_sse = true;
2347 }
2348
2349 /* Turn on MMX builtins for 3Dnow. */
2350 if (TARGET_3DNOW)
2351 ix86_isa_flags |= OPTION_MASK_ISA_MMX;
2352
2353 /* Turn on popcnt instruction for -msse4.2 or -mabm. */
2354 if (TARGET_SSE4_2 || TARGET_ABM)
2355 x86_popcnt = true;
2356
2357 /* Validate -mpreferred-stack-boundary= value, or provide default.
2358 The default of 128 bits is for Pentium III's SSE __m128. We can't
2359 change it because of optimize_size. Otherwise, we can't mix object
2360 files compiled with -Os and -On. */
2361 ix86_preferred_stack_boundary = 128;
2362 if (ix86_preferred_stack_boundary_string)
2363 {
2364 i = atoi (ix86_preferred_stack_boundary_string);
2365 if (i < (TARGET_64BIT ? 4 : 2) || i > 12)
2366 error ("-mpreferred-stack-boundary=%d is not between %d and 12", i,
2367 TARGET_64BIT ? 4 : 2);
2368 else
2369 ix86_preferred_stack_boundary = (1 << i) * BITS_PER_UNIT;
2370 }
2371
2372 /* Accept -msseregparm only if at least SSE support is enabled. */
2373 if (TARGET_SSEREGPARM
2374 && ! TARGET_SSE)
2375 error ("-msseregparm used without SSE enabled");
2376
2377 ix86_fpmath = TARGET_FPMATH_DEFAULT;
2378 if (ix86_fpmath_string != 0)
2379 {
2380 if (! strcmp (ix86_fpmath_string, "387"))
2381 ix86_fpmath = FPMATH_387;
2382 else if (! strcmp (ix86_fpmath_string, "sse"))
2383 {
2384 if (!TARGET_SSE)
2385 {
2386 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2387 ix86_fpmath = FPMATH_387;
2388 }
2389 else
2390 ix86_fpmath = FPMATH_SSE;
2391 }
2392 else if (! strcmp (ix86_fpmath_string, "387,sse")
2393 || ! strcmp (ix86_fpmath_string, "sse,387"))
2394 {
2395 if (!TARGET_SSE)
2396 {
2397 warning (0, "SSE instruction set disabled, using 387 arithmetics");
2398 ix86_fpmath = FPMATH_387;
2399 }
2400 else if (!TARGET_80387)
2401 {
2402 warning (0, "387 instruction set disabled, using SSE arithmetics");
2403 ix86_fpmath = FPMATH_SSE;
2404 }
2405 else
2406 ix86_fpmath = (enum fpmath_unit) (FPMATH_SSE | FPMATH_387);
2407 }
2408 else
2409 error ("bad value (%s) for -mfpmath= switch", ix86_fpmath_string);
2410 }
2411
2412 /* If the i387 is disabled, then do not return values in it. */
2413 if (!TARGET_80387)
2414 target_flags &= ~MASK_FLOAT_RETURNS;
2415
2416 /* Use external vectorized library in vectorizing intrinsics. */
2417 if (ix86_veclibabi_string)
2418 {
2419 if (strcmp (ix86_veclibabi_string, "acml") == 0)
2420 ix86_veclib_handler = ix86_veclibabi_acml;
2421 else
2422 error ("unknown vectorization library ABI type (%s) for "
2423 "-mveclibabi= switch", ix86_veclibabi_string);
2424 }
2425
2426 if ((x86_accumulate_outgoing_args & ix86_tune_mask)
2427 && !(target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2428 && !optimize_size)
2429 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2430
2431 /* ??? Unwind info is not correct around the CFG unless either a frame
2432 pointer is present or M_A_O_A is set. Fixing this requires rewriting
2433 unwind info generation to be aware of the CFG and propagating states
2434 around edges. */
2435 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
2436 || flag_exceptions || flag_non_call_exceptions)
2437 && flag_omit_frame_pointer
2438 && !(target_flags & MASK_ACCUMULATE_OUTGOING_ARGS))
2439 {
2440 if (target_flags_explicit & MASK_ACCUMULATE_OUTGOING_ARGS)
2441 warning (0, "unwind tables currently require either a frame pointer "
2442 "or -maccumulate-outgoing-args for correctness");
2443 target_flags |= MASK_ACCUMULATE_OUTGOING_ARGS;
2444 }
2445
2446 /* For sane SSE instruction set generation we need fcomi instruction.
2447 It is safe to enable all CMOVE instructions. */
2448 if (TARGET_SSE)
2449 TARGET_CMOVE = 1;
2450
2451 /* Figure out what ASM_GENERATE_INTERNAL_LABEL builds as a prefix. */
2452 {
2453 char *p;
2454 ASM_GENERATE_INTERNAL_LABEL (internal_label_prefix, "LX", 0);
2455 p = strchr (internal_label_prefix, 'X');
2456 internal_label_prefix_len = p - internal_label_prefix;
2457 *p = '\0';
2458 }
2459
2460 /* When scheduling description is not available, disable scheduler pass
2461 so it won't slow down the compilation and make x87 code slower. */
2462 if (!TARGET_SCHEDULE)
2463 flag_schedule_insns_after_reload = flag_schedule_insns = 0;
2464
2465 if (!PARAM_SET_P (PARAM_SIMULTANEOUS_PREFETCHES))
2466 set_param_value ("simultaneous-prefetches",
2467 ix86_cost->simultaneous_prefetches);
2468 if (!PARAM_SET_P (PARAM_L1_CACHE_LINE_SIZE))
2469 set_param_value ("l1-cache-line-size", ix86_cost->prefetch_block);
2470 if (!PARAM_SET_P (PARAM_L1_CACHE_SIZE))
2471 set_param_value ("l1-cache-size", ix86_cost->l1_cache_size);
2472 if (!PARAM_SET_P (PARAM_L2_CACHE_SIZE))
2473 set_param_value ("l2-cache-size", ix86_cost->l2_cache_size);
2474 }
2475 \f
2476 /* Return true if this goes in large data/bss. */
2477
2478 static bool
2479 ix86_in_large_data_p (tree exp)
2480 {
2481 if (ix86_cmodel != CM_MEDIUM && ix86_cmodel != CM_MEDIUM_PIC)
2482 return false;
2483
2484 /* Functions are never large data. */
2485 if (TREE_CODE (exp) == FUNCTION_DECL)
2486 return false;
2487
2488 if (TREE_CODE (exp) == VAR_DECL && DECL_SECTION_NAME (exp))
2489 {
2490 const char *section = TREE_STRING_POINTER (DECL_SECTION_NAME (exp));
2491 if (strcmp (section, ".ldata") == 0
2492 || strcmp (section, ".lbss") == 0)
2493 return true;
2494 return false;
2495 }
2496 else
2497 {
2498 HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (exp));
2499
2500 /* If this is an incomplete type with size 0, then we can't put it
2501 in data because it might be too big when completed. */
2502 if (!size || size > ix86_section_threshold)
2503 return true;
2504 }
2505
2506 return false;
2507 }
2508
2509 /* Switch to the appropriate section for output of DECL.
2510 DECL is either a `VAR_DECL' node or a constant of some sort.
2511 RELOC indicates whether forming the initial value of DECL requires
2512 link-time relocations. */
2513
2514 static section * x86_64_elf_select_section (tree, int, unsigned HOST_WIDE_INT)
2515 ATTRIBUTE_UNUSED;
2516
2517 static section *
2518 x86_64_elf_select_section (tree decl, int reloc,
2519 unsigned HOST_WIDE_INT align)
2520 {
2521 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2522 && ix86_in_large_data_p (decl))
2523 {
2524 const char *sname = NULL;
2525 unsigned int flags = SECTION_WRITE;
2526 switch (categorize_decl_for_section (decl, reloc))
2527 {
2528 case SECCAT_DATA:
2529 sname = ".ldata";
2530 break;
2531 case SECCAT_DATA_REL:
2532 sname = ".ldata.rel";
2533 break;
2534 case SECCAT_DATA_REL_LOCAL:
2535 sname = ".ldata.rel.local";
2536 break;
2537 case SECCAT_DATA_REL_RO:
2538 sname = ".ldata.rel.ro";
2539 break;
2540 case SECCAT_DATA_REL_RO_LOCAL:
2541 sname = ".ldata.rel.ro.local";
2542 break;
2543 case SECCAT_BSS:
2544 sname = ".lbss";
2545 flags |= SECTION_BSS;
2546 break;
2547 case SECCAT_RODATA:
2548 case SECCAT_RODATA_MERGE_STR:
2549 case SECCAT_RODATA_MERGE_STR_INIT:
2550 case SECCAT_RODATA_MERGE_CONST:
2551 sname = ".lrodata";
2552 flags = 0;
2553 break;
2554 case SECCAT_SRODATA:
2555 case SECCAT_SDATA:
2556 case SECCAT_SBSS:
2557 gcc_unreachable ();
2558 case SECCAT_TEXT:
2559 case SECCAT_TDATA:
2560 case SECCAT_TBSS:
2561 /* We don't split these for medium model. Place them into
2562 default sections and hope for best. */
2563 break;
2564 }
2565 if (sname)
2566 {
2567 /* We might get called with string constants, but get_named_section
2568 doesn't like them as they are not DECLs. Also, we need to set
2569 flags in that case. */
2570 if (!DECL_P (decl))
2571 return get_section (sname, flags, NULL);
2572 return get_named_section (decl, sname, reloc);
2573 }
2574 }
2575 return default_elf_select_section (decl, reloc, align);
2576 }
2577
2578 /* Build up a unique section name, expressed as a
2579 STRING_CST node, and assign it to DECL_SECTION_NAME (decl).
2580 RELOC indicates whether the initial value of EXP requires
2581 link-time relocations. */
2582
2583 static void ATTRIBUTE_UNUSED
2584 x86_64_elf_unique_section (tree decl, int reloc)
2585 {
2586 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2587 && ix86_in_large_data_p (decl))
2588 {
2589 const char *prefix = NULL;
2590 /* We only need to use .gnu.linkonce if we don't have COMDAT groups. */
2591 bool one_only = DECL_ONE_ONLY (decl) && !HAVE_COMDAT_GROUP;
2592
2593 switch (categorize_decl_for_section (decl, reloc))
2594 {
2595 case SECCAT_DATA:
2596 case SECCAT_DATA_REL:
2597 case SECCAT_DATA_REL_LOCAL:
2598 case SECCAT_DATA_REL_RO:
2599 case SECCAT_DATA_REL_RO_LOCAL:
2600 prefix = one_only ? ".gnu.linkonce.ld." : ".ldata.";
2601 break;
2602 case SECCAT_BSS:
2603 prefix = one_only ? ".gnu.linkonce.lb." : ".lbss.";
2604 break;
2605 case SECCAT_RODATA:
2606 case SECCAT_RODATA_MERGE_STR:
2607 case SECCAT_RODATA_MERGE_STR_INIT:
2608 case SECCAT_RODATA_MERGE_CONST:
2609 prefix = one_only ? ".gnu.linkonce.lr." : ".lrodata.";
2610 break;
2611 case SECCAT_SRODATA:
2612 case SECCAT_SDATA:
2613 case SECCAT_SBSS:
2614 gcc_unreachable ();
2615 case SECCAT_TEXT:
2616 case SECCAT_TDATA:
2617 case SECCAT_TBSS:
2618 /* We don't split these for medium model. Place them into
2619 default sections and hope for best. */
2620 break;
2621 }
2622 if (prefix)
2623 {
2624 const char *name;
2625 size_t nlen, plen;
2626 char *string;
2627 plen = strlen (prefix);
2628
2629 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
2630 name = targetm.strip_name_encoding (name);
2631 nlen = strlen (name);
2632
2633 string = (char *) alloca (nlen + plen + 1);
2634 memcpy (string, prefix, plen);
2635 memcpy (string + plen, name, nlen + 1);
2636
2637 DECL_SECTION_NAME (decl) = build_string (nlen + plen, string);
2638 return;
2639 }
2640 }
2641 default_unique_section (decl, reloc);
2642 }
2643
2644 #ifdef COMMON_ASM_OP
2645 /* This says how to output assembler code to declare an
2646 uninitialized external linkage data object.
2647
2648 For medium model x86-64 we need to use .largecomm opcode for
2649 large objects. */
2650 void
2651 x86_elf_aligned_common (FILE *file,
2652 const char *name, unsigned HOST_WIDE_INT size,
2653 int align)
2654 {
2655 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2656 && size > (unsigned int)ix86_section_threshold)
2657 fprintf (file, ".largecomm\t");
2658 else
2659 fprintf (file, "%s", COMMON_ASM_OP);
2660 assemble_name (file, name);
2661 fprintf (file, ","HOST_WIDE_INT_PRINT_UNSIGNED",%u\n",
2662 size, align / BITS_PER_UNIT);
2663 }
2664 #endif
2665
2666 /* Utility function for targets to use in implementing
2667 ASM_OUTPUT_ALIGNED_BSS. */
2668
2669 void
2670 x86_output_aligned_bss (FILE *file, tree decl ATTRIBUTE_UNUSED,
2671 const char *name, unsigned HOST_WIDE_INT size,
2672 int align)
2673 {
2674 if ((ix86_cmodel == CM_MEDIUM || ix86_cmodel == CM_MEDIUM_PIC)
2675 && size > (unsigned int)ix86_section_threshold)
2676 switch_to_section (get_named_section (decl, ".lbss", 0));
2677 else
2678 switch_to_section (bss_section);
2679 ASM_OUTPUT_ALIGN (file, floor_log2 (align / BITS_PER_UNIT));
2680 #ifdef ASM_DECLARE_OBJECT_NAME
2681 last_assemble_variable_decl = decl;
2682 ASM_DECLARE_OBJECT_NAME (file, name, decl);
2683 #else
2684 /* Standard thing is just output label for the object. */
2685 ASM_OUTPUT_LABEL (file, name);
2686 #endif /* ASM_DECLARE_OBJECT_NAME */
2687 ASM_OUTPUT_SKIP (file, size ? size : 1);
2688 }
2689 \f
2690 void
2691 optimization_options (int level, int size ATTRIBUTE_UNUSED)
2692 {
2693 /* For -O2 and beyond, turn off -fschedule-insns by default. It tends to
2694 make the problem with not enough registers even worse. */
2695 #ifdef INSN_SCHEDULING
2696 if (level > 1)
2697 flag_schedule_insns = 0;
2698 #endif
2699
2700 if (TARGET_MACHO)
2701 /* The Darwin libraries never set errno, so we might as well
2702 avoid calling them when that's the only reason we would. */
2703 flag_errno_math = 0;
2704
2705 /* The default values of these switches depend on the TARGET_64BIT
2706 that is not known at this moment. Mark these values with 2 and
2707 let user the to override these. In case there is no command line option
2708 specifying them, we will set the defaults in override_options. */
2709 if (optimize >= 1)
2710 flag_omit_frame_pointer = 2;
2711 flag_pcc_struct_return = 2;
2712 flag_asynchronous_unwind_tables = 2;
2713 #ifdef SUBTARGET_OPTIMIZATION_OPTIONS
2714 SUBTARGET_OPTIMIZATION_OPTIONS;
2715 #endif
2716 }
2717 \f
2718 /* Decide whether we can make a sibling call to a function. DECL is the
2719 declaration of the function being targeted by the call and EXP is the
2720 CALL_EXPR representing the call. */
2721
2722 static bool
2723 ix86_function_ok_for_sibcall (tree decl, tree exp)
2724 {
2725 tree func;
2726 rtx a, b;
2727
2728 /* If we are generating position-independent code, we cannot sibcall
2729 optimize any indirect call, or a direct call to a global function,
2730 as the PLT requires %ebx be live. */
2731 if (!TARGET_64BIT && flag_pic && (!decl || !targetm.binds_local_p (decl)))
2732 return false;
2733
2734 if (decl)
2735 func = decl;
2736 else
2737 {
2738 func = TREE_TYPE (CALL_EXPR_FN (exp));
2739 if (POINTER_TYPE_P (func))
2740 func = TREE_TYPE (func);
2741 }
2742
2743 /* Check that the return value locations are the same. Like
2744 if we are returning floats on the 80387 register stack, we cannot
2745 make a sibcall from a function that doesn't return a float to a
2746 function that does or, conversely, from a function that does return
2747 a float to a function that doesn't; the necessary stack adjustment
2748 would not be executed. This is also the place we notice
2749 differences in the return value ABI. Note that it is ok for one
2750 of the functions to have void return type as long as the return
2751 value of the other is passed in a register. */
2752 a = ix86_function_value (TREE_TYPE (exp), func, false);
2753 b = ix86_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
2754 cfun->decl, false);
2755 if (STACK_REG_P (a) || STACK_REG_P (b))
2756 {
2757 if (!rtx_equal_p (a, b))
2758 return false;
2759 }
2760 else if (VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
2761 ;
2762 else if (!rtx_equal_p (a, b))
2763 return false;
2764
2765 /* If this call is indirect, we'll need to be able to use a call-clobbered
2766 register for the address of the target function. Make sure that all
2767 such registers are not used for passing parameters. */
2768 if (!decl && !TARGET_64BIT)
2769 {
2770 tree type;
2771
2772 /* We're looking at the CALL_EXPR, we need the type of the function. */
2773 type = CALL_EXPR_FN (exp); /* pointer expression */
2774 type = TREE_TYPE (type); /* pointer type */
2775 type = TREE_TYPE (type); /* function type */
2776
2777 if (ix86_function_regparm (type, NULL) >= 3)
2778 {
2779 /* ??? Need to count the actual number of registers to be used,
2780 not the possible number of registers. Fix later. */
2781 return false;
2782 }
2783 }
2784
2785 /* Dllimport'd functions are also called indirectly. */
2786 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
2787 && decl && DECL_DLLIMPORT_P (decl)
2788 && ix86_function_regparm (TREE_TYPE (decl), NULL) >= 3)
2789 return false;
2790
2791 /* If we forced aligned the stack, then sibcalling would unalign the
2792 stack, which may break the called function. */
2793 if (cfun->machine->force_align_arg_pointer)
2794 return false;
2795
2796 /* Otherwise okay. That also includes certain types of indirect calls. */
2797 return true;
2798 }
2799
2800 /* Handle "cdecl", "stdcall", "fastcall", "regparm" and "sseregparm"
2801 calling convention attributes;
2802 arguments as in struct attribute_spec.handler. */
2803
2804 static tree
2805 ix86_handle_cconv_attribute (tree *node, tree name,
2806 tree args,
2807 int flags ATTRIBUTE_UNUSED,
2808 bool *no_add_attrs)
2809 {
2810 if (TREE_CODE (*node) != FUNCTION_TYPE
2811 && TREE_CODE (*node) != METHOD_TYPE
2812 && TREE_CODE (*node) != FIELD_DECL
2813 && TREE_CODE (*node) != TYPE_DECL)
2814 {
2815 warning (OPT_Wattributes, "%qs attribute only applies to functions",
2816 IDENTIFIER_POINTER (name));
2817 *no_add_attrs = true;
2818 return NULL_TREE;
2819 }
2820
2821 /* Can combine regparm with all attributes but fastcall. */
2822 if (is_attribute_p ("regparm", name))
2823 {
2824 tree cst;
2825
2826 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2827 {
2828 error ("fastcall and regparm attributes are not compatible");
2829 }
2830
2831 cst = TREE_VALUE (args);
2832 if (TREE_CODE (cst) != INTEGER_CST)
2833 {
2834 warning (OPT_Wattributes,
2835 "%qs attribute requires an integer constant argument",
2836 IDENTIFIER_POINTER (name));
2837 *no_add_attrs = true;
2838 }
2839 else if (compare_tree_int (cst, REGPARM_MAX) > 0)
2840 {
2841 warning (OPT_Wattributes, "argument to %qs attribute larger than %d",
2842 IDENTIFIER_POINTER (name), REGPARM_MAX);
2843 *no_add_attrs = true;
2844 }
2845
2846 if (!TARGET_64BIT
2847 && lookup_attribute (ix86_force_align_arg_pointer_string,
2848 TYPE_ATTRIBUTES (*node))
2849 && compare_tree_int (cst, REGPARM_MAX-1))
2850 {
2851 error ("%s functions limited to %d register parameters",
2852 ix86_force_align_arg_pointer_string, REGPARM_MAX-1);
2853 }
2854
2855 return NULL_TREE;
2856 }
2857
2858 if (TARGET_64BIT)
2859 {
2860 /* Do not warn when emulating the MS ABI. */
2861 if (!TARGET_64BIT_MS_ABI)
2862 warning (OPT_Wattributes, "%qs attribute ignored",
2863 IDENTIFIER_POINTER (name));
2864 *no_add_attrs = true;
2865 return NULL_TREE;
2866 }
2867
2868 /* Can combine fastcall with stdcall (redundant) and sseregparm. */
2869 if (is_attribute_p ("fastcall", name))
2870 {
2871 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2872 {
2873 error ("fastcall and cdecl attributes are not compatible");
2874 }
2875 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2876 {
2877 error ("fastcall and stdcall attributes are not compatible");
2878 }
2879 if (lookup_attribute ("regparm", TYPE_ATTRIBUTES (*node)))
2880 {
2881 error ("fastcall and regparm attributes are not compatible");
2882 }
2883 }
2884
2885 /* Can combine stdcall with fastcall (redundant), regparm and
2886 sseregparm. */
2887 else if (is_attribute_p ("stdcall", name))
2888 {
2889 if (lookup_attribute ("cdecl", TYPE_ATTRIBUTES (*node)))
2890 {
2891 error ("stdcall and cdecl attributes are not compatible");
2892 }
2893 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2894 {
2895 error ("stdcall and fastcall attributes are not compatible");
2896 }
2897 }
2898
2899 /* Can combine cdecl with regparm and sseregparm. */
2900 else if (is_attribute_p ("cdecl", name))
2901 {
2902 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (*node)))
2903 {
2904 error ("stdcall and cdecl attributes are not compatible");
2905 }
2906 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (*node)))
2907 {
2908 error ("fastcall and cdecl attributes are not compatible");
2909 }
2910 }
2911
2912 /* Can combine sseregparm with all attributes. */
2913
2914 return NULL_TREE;
2915 }
2916
2917 /* Return 0 if the attributes for two types are incompatible, 1 if they
2918 are compatible, and 2 if they are nearly compatible (which causes a
2919 warning to be generated). */
2920
2921 static int
2922 ix86_comp_type_attributes (const_tree type1, const_tree type2)
2923 {
2924 /* Check for mismatch of non-default calling convention. */
2925 const char *const rtdstr = TARGET_RTD ? "cdecl" : "stdcall";
2926
2927 if (TREE_CODE (type1) != FUNCTION_TYPE)
2928 return 1;
2929
2930 /* Check for mismatched fastcall/regparm types. */
2931 if ((!lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type1))
2932 != !lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type2)))
2933 || (ix86_function_regparm (type1, NULL)
2934 != ix86_function_regparm (type2, NULL)))
2935 return 0;
2936
2937 /* Check for mismatched sseregparm types. */
2938 if (!lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type1))
2939 != !lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type2)))
2940 return 0;
2941
2942 /* Check for mismatched return types (cdecl vs stdcall). */
2943 if (!lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type1))
2944 != !lookup_attribute (rtdstr, TYPE_ATTRIBUTES (type2)))
2945 return 0;
2946
2947 return 1;
2948 }
2949 \f
2950 /* Return the regparm value for a function with the indicated TYPE and DECL.
2951 DECL may be NULL when calling function indirectly
2952 or considering a libcall. */
2953
2954 static int
2955 ix86_function_regparm (const_tree type, const_tree decl)
2956 {
2957 tree attr;
2958 int regparm = ix86_regparm;
2959
2960 if (TARGET_64BIT)
2961 return regparm;
2962
2963 attr = lookup_attribute ("regparm", TYPE_ATTRIBUTES (type));
2964 if (attr)
2965 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (attr)));
2966
2967 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
2968 return 2;
2969
2970 /* Use register calling convention for local functions when possible. */
2971 if (decl && TREE_CODE (decl) == FUNCTION_DECL
2972 && flag_unit_at_a_time && !profile_flag)
2973 {
2974 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
2975 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
2976 if (i && i->local)
2977 {
2978 int local_regparm, globals = 0, regno;
2979 struct function *f;
2980
2981 /* Make sure no regparm register is taken by a
2982 global register variable. */
2983 for (local_regparm = 0; local_regparm < 3; local_regparm++)
2984 if (global_regs[local_regparm])
2985 break;
2986
2987 /* We can't use regparm(3) for nested functions as these use
2988 static chain pointer in third argument. */
2989 if (local_regparm == 3
2990 && (decl_function_context (decl)
2991 || ix86_force_align_arg_pointer)
2992 && !DECL_NO_STATIC_CHAIN (decl))
2993 local_regparm = 2;
2994
2995 /* If the function realigns its stackpointer, the prologue will
2996 clobber %ecx. If we've already generated code for the callee,
2997 the callee DECL_STRUCT_FUNCTION is gone, so we fall back to
2998 scanning the attributes for the self-realigning property. */
2999 f = DECL_STRUCT_FUNCTION (decl);
3000 if (local_regparm == 3
3001 && (f ? !!f->machine->force_align_arg_pointer
3002 : !!lookup_attribute (ix86_force_align_arg_pointer_string,
3003 TYPE_ATTRIBUTES (TREE_TYPE (decl)))))
3004 local_regparm = 2;
3005
3006 /* Each global register variable increases register preassure,
3007 so the more global reg vars there are, the smaller regparm
3008 optimization use, unless requested by the user explicitly. */
3009 for (regno = 0; regno < 6; regno++)
3010 if (global_regs[regno])
3011 globals++;
3012 local_regparm
3013 = globals < local_regparm ? local_regparm - globals : 0;
3014
3015 if (local_regparm > regparm)
3016 regparm = local_regparm;
3017 }
3018 }
3019
3020 return regparm;
3021 }
3022
3023 /* Return 1 or 2, if we can pass up to SSE_REGPARM_MAX SFmode (1) and
3024 DFmode (2) arguments in SSE registers for a function with the
3025 indicated TYPE and DECL. DECL may be NULL when calling function
3026 indirectly or considering a libcall. Otherwise return 0. */
3027
3028 static int
3029 ix86_function_sseregparm (const_tree type, const_tree decl)
3030 {
3031 gcc_assert (!TARGET_64BIT);
3032
3033 /* Use SSE registers to pass SFmode and DFmode arguments if requested
3034 by the sseregparm attribute. */
3035 if (TARGET_SSEREGPARM
3036 || (type && lookup_attribute ("sseregparm", TYPE_ATTRIBUTES (type))))
3037 {
3038 if (!TARGET_SSE)
3039 {
3040 if (decl)
3041 error ("Calling %qD with attribute sseregparm without "
3042 "SSE/SSE2 enabled", decl);
3043 else
3044 error ("Calling %qT with attribute sseregparm without "
3045 "SSE/SSE2 enabled", type);
3046 return 0;
3047 }
3048
3049 return 2;
3050 }
3051
3052 /* For local functions, pass up to SSE_REGPARM_MAX SFmode
3053 (and DFmode for SSE2) arguments in SSE registers. */
3054 if (decl && TARGET_SSE_MATH && flag_unit_at_a_time && !profile_flag)
3055 {
3056 /* FIXME: remove this CONST_CAST when cgraph.[ch] is constified. */
3057 struct cgraph_local_info *i = cgraph_local_info (CONST_CAST_TREE(decl));
3058 if (i && i->local)
3059 return TARGET_SSE2 ? 2 : 1;
3060 }
3061
3062 return 0;
3063 }
3064
3065 /* Return true if EAX is live at the start of the function. Used by
3066 ix86_expand_prologue to determine if we need special help before
3067 calling allocate_stack_worker. */
3068
3069 static bool
3070 ix86_eax_live_at_start_p (void)
3071 {
3072 /* Cheat. Don't bother working forward from ix86_function_regparm
3073 to the function type to whether an actual argument is located in
3074 eax. Instead just look at cfg info, which is still close enough
3075 to correct at this point. This gives false positives for broken
3076 functions that might use uninitialized data that happens to be
3077 allocated in eax, but who cares? */
3078 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR), 0);
3079 }
3080
3081 /* Return true if TYPE has a variable argument list. */
3082
3083 static bool
3084 type_has_variadic_args_p (tree type)
3085 {
3086 tree n, t = TYPE_ARG_TYPES (type);
3087
3088 if (t == NULL)
3089 return false;
3090
3091 while ((n = TREE_CHAIN (t)) != NULL)
3092 t = n;
3093
3094 return TREE_VALUE (t) != void_type_node;
3095 }
3096
3097 /* Value is the number of bytes of arguments automatically
3098 popped when returning from a subroutine call.
3099 FUNDECL is the declaration node of the function (as a tree),
3100 FUNTYPE is the data type of the function (as a tree),
3101 or for a library call it is an identifier node for the subroutine name.
3102 SIZE is the number of bytes of arguments passed on the stack.
3103
3104 On the 80386, the RTD insn may be used to pop them if the number
3105 of args is fixed, but if the number is variable then the caller
3106 must pop them all. RTD can't be used for library calls now
3107 because the library is compiled with the Unix compiler.
3108 Use of RTD is a selectable option, since it is incompatible with
3109 standard Unix calling sequences. If the option is not selected,
3110 the caller must always pop the args.
3111
3112 The attribute stdcall is equivalent to RTD on a per module basis. */
3113
3114 int
3115 ix86_return_pops_args (tree fundecl, tree funtype, int size)
3116 {
3117 int rtd;
3118
3119 /* None of the 64-bit ABIs pop arguments. */
3120 if (TARGET_64BIT)
3121 return 0;
3122
3123 rtd = TARGET_RTD && (!fundecl || TREE_CODE (fundecl) != IDENTIFIER_NODE);
3124
3125 /* Cdecl functions override -mrtd, and never pop the stack. */
3126 if (! lookup_attribute ("cdecl", TYPE_ATTRIBUTES (funtype)))
3127 {
3128 /* Stdcall and fastcall functions will pop the stack if not
3129 variable args. */
3130 if (lookup_attribute ("stdcall", TYPE_ATTRIBUTES (funtype))
3131 || lookup_attribute ("fastcall", TYPE_ATTRIBUTES (funtype)))
3132 rtd = 1;
3133
3134 if (rtd && ! type_has_variadic_args_p (funtype))
3135 return size;
3136 }
3137
3138 /* Lose any fake structure return argument if it is passed on the stack. */
3139 if (aggregate_value_p (TREE_TYPE (funtype), fundecl)
3140 && !KEEP_AGGREGATE_RETURN_POINTER)
3141 {
3142 int nregs = ix86_function_regparm (funtype, fundecl);
3143 if (nregs == 0)
3144 return GET_MODE_SIZE (Pmode);
3145 }
3146
3147 return 0;
3148 }
3149 \f
3150 /* Argument support functions. */
3151
3152 /* Return true when register may be used to pass function parameters. */
3153 bool
3154 ix86_function_arg_regno_p (int regno)
3155 {
3156 int i;
3157 const int *parm_regs;
3158
3159 if (!TARGET_64BIT)
3160 {
3161 if (TARGET_MACHO)
3162 return (regno < REGPARM_MAX
3163 || (TARGET_SSE && SSE_REGNO_P (regno) && !fixed_regs[regno]));
3164 else
3165 return (regno < REGPARM_MAX
3166 || (TARGET_MMX && MMX_REGNO_P (regno)
3167 && (regno < FIRST_MMX_REG + MMX_REGPARM_MAX))
3168 || (TARGET_SSE && SSE_REGNO_P (regno)
3169 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX)));
3170 }
3171
3172 if (TARGET_MACHO)
3173 {
3174 if (SSE_REGNO_P (regno) && TARGET_SSE)
3175 return true;
3176 }
3177 else
3178 {
3179 if (TARGET_SSE && SSE_REGNO_P (regno)
3180 && (regno < FIRST_SSE_REG + SSE_REGPARM_MAX))
3181 return true;
3182 }
3183
3184 /* RAX is used as hidden argument to va_arg functions. */
3185 if (!TARGET_64BIT_MS_ABI && regno == 0)
3186 return true;
3187
3188 if (TARGET_64BIT_MS_ABI)
3189 parm_regs = x86_64_ms_abi_int_parameter_registers;
3190 else
3191 parm_regs = x86_64_int_parameter_registers;
3192 for (i = 0; i < REGPARM_MAX; i++)
3193 if (regno == parm_regs[i])
3194 return true;
3195 return false;
3196 }
3197
3198 /* Return if we do not know how to pass TYPE solely in registers. */
3199
3200 static bool
3201 ix86_must_pass_in_stack (enum machine_mode mode, const_tree type)
3202 {
3203 if (must_pass_in_stack_var_size_or_pad (mode, type))
3204 return true;
3205
3206 /* For 32-bit, we want TImode aggregates to go on the stack. But watch out!
3207 The layout_type routine is crafty and tries to trick us into passing
3208 currently unsupported vector types on the stack by using TImode. */
3209 return (!TARGET_64BIT && mode == TImode
3210 && type && TREE_CODE (type) != VECTOR_TYPE);
3211 }
3212
3213 /* Initialize a variable CUM of type CUMULATIVE_ARGS
3214 for a call to a function whose data type is FNTYPE.
3215 For a library call, FNTYPE is 0. */
3216
3217 void
3218 init_cumulative_args (CUMULATIVE_ARGS *cum, /* Argument info to initialize */
3219 tree fntype, /* tree ptr for function decl */
3220 rtx libname, /* SYMBOL_REF of library name or 0 */
3221 tree fndecl)
3222 {
3223 memset (cum, 0, sizeof (*cum));
3224
3225 /* Set up the number of registers to use for passing arguments. */
3226 cum->nregs = ix86_regparm;
3227 if (TARGET_SSE)
3228 cum->sse_nregs = SSE_REGPARM_MAX;
3229 if (TARGET_MMX)
3230 cum->mmx_nregs = MMX_REGPARM_MAX;
3231 cum->warn_sse = true;
3232 cum->warn_mmx = true;
3233 cum->maybe_vaarg = (fntype
3234 ? (!TYPE_ARG_TYPES (fntype)
3235 || type_has_variadic_args_p (fntype))
3236 : !libname);
3237
3238 if (!TARGET_64BIT)
3239 {
3240 /* If there are variable arguments, then we won't pass anything
3241 in registers in 32-bit mode. */
3242 if (cum->maybe_vaarg)
3243 {
3244 cum->nregs = 0;
3245 cum->sse_nregs = 0;
3246 cum->mmx_nregs = 0;
3247 cum->warn_sse = 0;
3248 cum->warn_mmx = 0;
3249 return;
3250 }
3251
3252 /* Use ecx and edx registers if function has fastcall attribute,
3253 else look for regparm information. */
3254 if (fntype)
3255 {
3256 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (fntype)))
3257 {
3258 cum->nregs = 2;
3259 cum->fastcall = 1;
3260 }
3261 else
3262 cum->nregs = ix86_function_regparm (fntype, fndecl);
3263 }
3264
3265 /* Set up the number of SSE registers used for passing SFmode
3266 and DFmode arguments. Warn for mismatching ABI. */
3267 cum->float_in_sse = ix86_function_sseregparm (fntype, fndecl);
3268 }
3269 }
3270
3271 /* Return the "natural" mode for TYPE. In most cases, this is just TYPE_MODE.
3272 But in the case of vector types, it is some vector mode.
3273
3274 When we have only some of our vector isa extensions enabled, then there
3275 are some modes for which vector_mode_supported_p is false. For these
3276 modes, the generic vector support in gcc will choose some non-vector mode
3277 in order to implement the type. By computing the natural mode, we'll
3278 select the proper ABI location for the operand and not depend on whatever
3279 the middle-end decides to do with these vector types. */
3280
3281 static enum machine_mode
3282 type_natural_mode (const_tree type)
3283 {
3284 enum machine_mode mode = TYPE_MODE (type);
3285
3286 if (TREE_CODE (type) == VECTOR_TYPE && !VECTOR_MODE_P (mode))
3287 {
3288 HOST_WIDE_INT size = int_size_in_bytes (type);
3289 if ((size == 8 || size == 16)
3290 /* ??? Generic code allows us to create width 1 vectors. Ignore. */
3291 && TYPE_VECTOR_SUBPARTS (type) > 1)
3292 {
3293 enum machine_mode innermode = TYPE_MODE (TREE_TYPE (type));
3294
3295 if (TREE_CODE (TREE_TYPE (type)) == REAL_TYPE)
3296 mode = MIN_MODE_VECTOR_FLOAT;
3297 else
3298 mode = MIN_MODE_VECTOR_INT;
3299
3300 /* Get the mode which has this inner mode and number of units. */
3301 for (; mode != VOIDmode; mode = GET_MODE_WIDER_MODE (mode))
3302 if (GET_MODE_NUNITS (mode) == TYPE_VECTOR_SUBPARTS (type)
3303 && GET_MODE_INNER (mode) == innermode)
3304 return mode;
3305
3306 gcc_unreachable ();
3307 }
3308 }
3309
3310 return mode;
3311 }
3312
3313 /* We want to pass a value in REGNO whose "natural" mode is MODE. However,
3314 this may not agree with the mode that the type system has chosen for the
3315 register, which is ORIG_MODE. If ORIG_MODE is not BLKmode, then we can
3316 go ahead and use it. Otherwise we have to build a PARALLEL instead. */
3317
3318 static rtx
3319 gen_reg_or_parallel (enum machine_mode mode, enum machine_mode orig_mode,
3320 unsigned int regno)
3321 {
3322 rtx tmp;
3323
3324 if (orig_mode != BLKmode)
3325 tmp = gen_rtx_REG (orig_mode, regno);
3326 else
3327 {
3328 tmp = gen_rtx_REG (mode, regno);
3329 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp, const0_rtx);
3330 tmp = gen_rtx_PARALLEL (orig_mode, gen_rtvec (1, tmp));
3331 }
3332
3333 return tmp;
3334 }
3335
3336 /* x86-64 register passing implementation. See x86-64 ABI for details. Goal
3337 of this code is to classify each 8bytes of incoming argument by the register
3338 class and assign registers accordingly. */
3339
3340 /* Return the union class of CLASS1 and CLASS2.
3341 See the x86-64 PS ABI for details. */
3342
3343 static enum x86_64_reg_class
3344 merge_classes (enum x86_64_reg_class class1, enum x86_64_reg_class class2)
3345 {
3346 /* Rule #1: If both classes are equal, this is the resulting class. */
3347 if (class1 == class2)
3348 return class1;
3349
3350 /* Rule #2: If one of the classes is NO_CLASS, the resulting class is
3351 the other class. */
3352 if (class1 == X86_64_NO_CLASS)
3353 return class2;
3354 if (class2 == X86_64_NO_CLASS)
3355 return class1;
3356
3357 /* Rule #3: If one of the classes is MEMORY, the result is MEMORY. */
3358 if (class1 == X86_64_MEMORY_CLASS || class2 == X86_64_MEMORY_CLASS)
3359 return X86_64_MEMORY_CLASS;
3360
3361 /* Rule #4: If one of the classes is INTEGER, the result is INTEGER. */
3362 if ((class1 == X86_64_INTEGERSI_CLASS && class2 == X86_64_SSESF_CLASS)
3363 || (class2 == X86_64_INTEGERSI_CLASS && class1 == X86_64_SSESF_CLASS))
3364 return X86_64_INTEGERSI_CLASS;
3365 if (class1 == X86_64_INTEGER_CLASS || class1 == X86_64_INTEGERSI_CLASS
3366 || class2 == X86_64_INTEGER_CLASS || class2 == X86_64_INTEGERSI_CLASS)
3367 return X86_64_INTEGER_CLASS;
3368
3369 /* Rule #5: If one of the classes is X87, X87UP, or COMPLEX_X87 class,
3370 MEMORY is used. */
3371 if (class1 == X86_64_X87_CLASS
3372 || class1 == X86_64_X87UP_CLASS
3373 || class1 == X86_64_COMPLEX_X87_CLASS
3374 || class2 == X86_64_X87_CLASS
3375 || class2 == X86_64_X87UP_CLASS
3376 || class2 == X86_64_COMPLEX_X87_CLASS)
3377 return X86_64_MEMORY_CLASS;
3378
3379 /* Rule #6: Otherwise class SSE is used. */
3380 return X86_64_SSE_CLASS;
3381 }
3382
3383 /* Classify the argument of type TYPE and mode MODE.
3384 CLASSES will be filled by the register class used to pass each word
3385 of the operand. The number of words is returned. In case the parameter
3386 should be passed in memory, 0 is returned. As a special case for zero
3387 sized containers, classes[0] will be NO_CLASS and 1 is returned.
3388
3389 BIT_OFFSET is used internally for handling records and specifies offset
3390 of the offset in bits modulo 256 to avoid overflow cases.
3391
3392 See the x86-64 PS ABI for details.
3393 */
3394
3395 static int
3396 classify_argument (enum machine_mode mode, const_tree type,
3397 enum x86_64_reg_class classes[MAX_CLASSES], int bit_offset)
3398 {
3399 HOST_WIDE_INT bytes =
3400 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3401 int words = (bytes + (bit_offset % 64) / 8 + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
3402
3403 /* Variable sized entities are always passed/returned in memory. */
3404 if (bytes < 0)
3405 return 0;
3406
3407 if (mode != VOIDmode
3408 && targetm.calls.must_pass_in_stack (mode, type))
3409 return 0;
3410
3411 if (type && AGGREGATE_TYPE_P (type))
3412 {
3413 int i;
3414 tree field;
3415 enum x86_64_reg_class subclasses[MAX_CLASSES];
3416
3417 /* On x86-64 we pass structures larger than 16 bytes on the stack. */
3418 if (bytes > 16)
3419 return 0;
3420
3421 for (i = 0; i < words; i++)
3422 classes[i] = X86_64_NO_CLASS;
3423
3424 /* Zero sized arrays or structures are NO_CLASS. We return 0 to
3425 signalize memory class, so handle it as special case. */
3426 if (!words)
3427 {
3428 classes[0] = X86_64_NO_CLASS;
3429 return 1;
3430 }
3431
3432 /* Classify each field of record and merge classes. */
3433 switch (TREE_CODE (type))
3434 {
3435 case RECORD_TYPE:
3436 /* And now merge the fields of structure. */
3437 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3438 {
3439 if (TREE_CODE (field) == FIELD_DECL)
3440 {
3441 int num;
3442
3443 if (TREE_TYPE (field) == error_mark_node)
3444 continue;
3445
3446 /* Bitfields are always classified as integer. Handle them
3447 early, since later code would consider them to be
3448 misaligned integers. */
3449 if (DECL_BIT_FIELD (field))
3450 {
3451 for (i = (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3452 i < ((int_bit_position (field) + (bit_offset % 64))
3453 + tree_low_cst (DECL_SIZE (field), 0)
3454 + 63) / 8 / 8; i++)
3455 classes[i] =
3456 merge_classes (X86_64_INTEGER_CLASS,
3457 classes[i]);
3458 }
3459 else
3460 {
3461 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3462 TREE_TYPE (field), subclasses,
3463 (int_bit_position (field)
3464 + bit_offset) % 256);
3465 if (!num)
3466 return 0;
3467 for (i = 0; i < num; i++)
3468 {
3469 int pos =
3470 (int_bit_position (field) + (bit_offset % 64)) / 8 / 8;
3471 classes[i + pos] =
3472 merge_classes (subclasses[i], classes[i + pos]);
3473 }
3474 }
3475 }
3476 }
3477 break;
3478
3479 case ARRAY_TYPE:
3480 /* Arrays are handled as small records. */
3481 {
3482 int num;
3483 num = classify_argument (TYPE_MODE (TREE_TYPE (type)),
3484 TREE_TYPE (type), subclasses, bit_offset);
3485 if (!num)
3486 return 0;
3487
3488 /* The partial classes are now full classes. */
3489 if (subclasses[0] == X86_64_SSESF_CLASS && bytes != 4)
3490 subclasses[0] = X86_64_SSE_CLASS;
3491 if (subclasses[0] == X86_64_INTEGERSI_CLASS && bytes != 4)
3492 subclasses[0] = X86_64_INTEGER_CLASS;
3493
3494 for (i = 0; i < words; i++)
3495 classes[i] = subclasses[i % num];
3496
3497 break;
3498 }
3499 case UNION_TYPE:
3500 case QUAL_UNION_TYPE:
3501 /* Unions are similar to RECORD_TYPE but offset is always 0.
3502 */
3503 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
3504 {
3505 if (TREE_CODE (field) == FIELD_DECL)
3506 {
3507 int num;
3508
3509 if (TREE_TYPE (field) == error_mark_node)
3510 continue;
3511
3512 num = classify_argument (TYPE_MODE (TREE_TYPE (field)),
3513 TREE_TYPE (field), subclasses,
3514 bit_offset);
3515 if (!num)
3516 return 0;
3517 for (i = 0; i < num; i++)
3518 classes[i] = merge_classes (subclasses[i], classes[i]);
3519 }
3520 }
3521 break;
3522
3523 default:
3524 gcc_unreachable ();
3525 }
3526
3527 /* Final merger cleanup. */
3528 for (i = 0; i < words; i++)
3529 {
3530 /* If one class is MEMORY, everything should be passed in
3531 memory. */
3532 if (classes[i] == X86_64_MEMORY_CLASS)
3533 return 0;
3534
3535 /* The X86_64_SSEUP_CLASS should be always preceded by
3536 X86_64_SSE_CLASS. */
3537 if (classes[i] == X86_64_SSEUP_CLASS
3538 && (i == 0 || classes[i - 1] != X86_64_SSE_CLASS))
3539 classes[i] = X86_64_SSE_CLASS;
3540
3541 /* X86_64_X87UP_CLASS should be preceded by X86_64_X87_CLASS. */
3542 if (classes[i] == X86_64_X87UP_CLASS
3543 && (i == 0 || classes[i - 1] != X86_64_X87_CLASS))
3544 classes[i] = X86_64_SSE_CLASS;
3545 }
3546 return words;
3547 }
3548
3549 /* Compute alignment needed. We align all types to natural boundaries with
3550 exception of XFmode that is aligned to 64bits. */
3551 if (mode != VOIDmode && mode != BLKmode)
3552 {
3553 int mode_alignment = GET_MODE_BITSIZE (mode);
3554
3555 if (mode == XFmode)
3556 mode_alignment = 128;
3557 else if (mode == XCmode)
3558 mode_alignment = 256;
3559 if (COMPLEX_MODE_P (mode))
3560 mode_alignment /= 2;
3561 /* Misaligned fields are always returned in memory. */
3562 if (bit_offset % mode_alignment)
3563 return 0;
3564 }
3565
3566 /* for V1xx modes, just use the base mode */
3567 if (VECTOR_MODE_P (mode)
3568 && GET_MODE_SIZE (GET_MODE_INNER (mode)) == bytes)
3569 mode = GET_MODE_INNER (mode);
3570
3571 /* Classification of atomic types. */
3572 switch (mode)
3573 {
3574 case SDmode:
3575 case DDmode:
3576 classes[0] = X86_64_SSE_CLASS;
3577 return 1;
3578 case TDmode:
3579 classes[0] = X86_64_SSE_CLASS;
3580 classes[1] = X86_64_SSEUP_CLASS;
3581 return 2;
3582 case DImode:
3583 case SImode:
3584 case HImode:
3585 case QImode:
3586 case CSImode:
3587 case CHImode:
3588 case CQImode:
3589 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3590 classes[0] = X86_64_INTEGERSI_CLASS;
3591 else
3592 classes[0] = X86_64_INTEGER_CLASS;
3593 return 1;
3594 case CDImode:
3595 case TImode:
3596 classes[0] = classes[1] = X86_64_INTEGER_CLASS;
3597 return 2;
3598 case CTImode:
3599 return 0;
3600 case SFmode:
3601 if (!(bit_offset % 64))
3602 classes[0] = X86_64_SSESF_CLASS;
3603 else
3604 classes[0] = X86_64_SSE_CLASS;
3605 return 1;
3606 case DFmode:
3607 classes[0] = X86_64_SSEDF_CLASS;
3608 return 1;
3609 case XFmode:
3610 classes[0] = X86_64_X87_CLASS;
3611 classes[1] = X86_64_X87UP_CLASS;
3612 return 2;
3613 case TFmode:
3614 classes[0] = X86_64_SSE_CLASS;
3615 classes[1] = X86_64_SSEUP_CLASS;
3616 return 2;
3617 case SCmode:
3618 classes[0] = X86_64_SSE_CLASS;
3619 return 1;
3620 case DCmode:
3621 classes[0] = X86_64_SSEDF_CLASS;
3622 classes[1] = X86_64_SSEDF_CLASS;
3623 return 2;
3624 case XCmode:
3625 classes[0] = X86_64_COMPLEX_X87_CLASS;
3626 return 1;
3627 case TCmode:
3628 /* This modes is larger than 16 bytes. */
3629 return 0;
3630 case V4SFmode:
3631 case V4SImode:
3632 case V16QImode:
3633 case V8HImode:
3634 case V2DFmode:
3635 case V2DImode:
3636 classes[0] = X86_64_SSE_CLASS;
3637 classes[1] = X86_64_SSEUP_CLASS;
3638 return 2;
3639 case V2SFmode:
3640 case V2SImode:
3641 case V4HImode:
3642 case V8QImode:
3643 classes[0] = X86_64_SSE_CLASS;
3644 return 1;
3645 case BLKmode:
3646 case VOIDmode:
3647 return 0;
3648 default:
3649 gcc_assert (VECTOR_MODE_P (mode));
3650
3651 if (bytes > 16)
3652 return 0;
3653
3654 gcc_assert (GET_MODE_CLASS (GET_MODE_INNER (mode)) == MODE_INT);
3655
3656 if (bit_offset + GET_MODE_BITSIZE (mode) <= 32)
3657 classes[0] = X86_64_INTEGERSI_CLASS;
3658 else
3659 classes[0] = X86_64_INTEGER_CLASS;
3660 classes[1] = X86_64_INTEGER_CLASS;
3661 return 1 + (bytes > 8);
3662 }
3663 }
3664
3665 /* Examine the argument and return set number of register required in each
3666 class. Return 0 iff parameter should be passed in memory. */
3667 static int
3668 examine_argument (enum machine_mode mode, const_tree type, int in_return,
3669 int *int_nregs, int *sse_nregs)
3670 {
3671 enum x86_64_reg_class regclass[MAX_CLASSES];
3672 int n = classify_argument (mode, type, regclass, 0);
3673
3674 *int_nregs = 0;
3675 *sse_nregs = 0;
3676 if (!n)
3677 return 0;
3678 for (n--; n >= 0; n--)
3679 switch (regclass[n])
3680 {
3681 case X86_64_INTEGER_CLASS:
3682 case X86_64_INTEGERSI_CLASS:
3683 (*int_nregs)++;
3684 break;
3685 case X86_64_SSE_CLASS:
3686 case X86_64_SSESF_CLASS:
3687 case X86_64_SSEDF_CLASS:
3688 (*sse_nregs)++;
3689 break;
3690 case X86_64_NO_CLASS:
3691 case X86_64_SSEUP_CLASS:
3692 break;
3693 case X86_64_X87_CLASS:
3694 case X86_64_X87UP_CLASS:
3695 if (!in_return)
3696 return 0;
3697 break;
3698 case X86_64_COMPLEX_X87_CLASS:
3699 return in_return ? 2 : 0;
3700 case X86_64_MEMORY_CLASS:
3701 gcc_unreachable ();
3702 }
3703 return 1;
3704 }
3705
3706 /* Construct container for the argument used by GCC interface. See
3707 FUNCTION_ARG for the detailed description. */
3708
3709 static rtx
3710 construct_container (enum machine_mode mode, enum machine_mode orig_mode,
3711 const_tree type, int in_return, int nintregs, int nsseregs,
3712 const int *intreg, int sse_regno)
3713 {
3714 /* The following variables hold the static issued_error state. */
3715 static bool issued_sse_arg_error;
3716 static bool issued_sse_ret_error;
3717 static bool issued_x87_ret_error;
3718
3719 enum machine_mode tmpmode;
3720 int bytes =
3721 (mode == BLKmode) ? int_size_in_bytes (type) : (int) GET_MODE_SIZE (mode);
3722 enum x86_64_reg_class regclass[MAX_CLASSES];
3723 int n;
3724 int i;
3725 int nexps = 0;
3726 int needed_sseregs, needed_intregs;
3727 rtx exp[MAX_CLASSES];
3728 rtx ret;
3729
3730 n = classify_argument (mode, type, regclass, 0);
3731 if (!n)
3732 return NULL;
3733 if (!examine_argument (mode, type, in_return, &needed_intregs,
3734 &needed_sseregs))
3735 return NULL;
3736 if (needed_intregs > nintregs || needed_sseregs > nsseregs)
3737 return NULL;
3738
3739 /* We allowed the user to turn off SSE for kernel mode. Don't crash if
3740 some less clueful developer tries to use floating-point anyway. */
3741 if (needed_sseregs && !TARGET_SSE)
3742 {
3743 if (in_return)
3744 {
3745 if (!issued_sse_ret_error)
3746 {
3747 error ("SSE register return with SSE disabled");
3748 issued_sse_ret_error = true;
3749 }
3750 }
3751 else if (!issued_sse_arg_error)
3752 {
3753 error ("SSE register argument with SSE disabled");
3754 issued_sse_arg_error = true;
3755 }
3756 return NULL;
3757 }
3758
3759 /* Likewise, error if the ABI requires us to return values in the
3760 x87 registers and the user specified -mno-80387. */
3761 if (!TARGET_80387 && in_return)
3762 for (i = 0; i < n; i++)
3763 if (regclass[i] == X86_64_X87_CLASS
3764 || regclass[i] == X86_64_X87UP_CLASS
3765 || regclass[i] == X86_64_COMPLEX_X87_CLASS)
3766 {
3767 if (!issued_x87_ret_error)
3768 {
3769 error ("x87 register return with x87 disabled");
3770 issued_x87_ret_error = true;
3771 }
3772 return NULL;
3773 }
3774
3775 /* First construct simple cases. Avoid SCmode, since we want to use
3776 single register to pass this type. */
3777 if (n == 1 && mode != SCmode)
3778 switch (regclass[0])
3779 {
3780 case X86_64_INTEGER_CLASS:
3781 case X86_64_INTEGERSI_CLASS:
3782 return gen_rtx_REG (mode, intreg[0]);
3783 case X86_64_SSE_CLASS:
3784 case X86_64_SSESF_CLASS:
3785 case X86_64_SSEDF_CLASS:
3786 return gen_reg_or_parallel (mode, orig_mode, SSE_REGNO (sse_regno));
3787 case X86_64_X87_CLASS:
3788 case X86_64_COMPLEX_X87_CLASS:
3789 return gen_rtx_REG (mode, FIRST_STACK_REG);
3790 case X86_64_NO_CLASS:
3791 /* Zero sized array, struct or class. */
3792 return NULL;
3793 default:
3794 gcc_unreachable ();
3795 }
3796 if (n == 2 && regclass[0] == X86_64_SSE_CLASS
3797 && regclass[1] == X86_64_SSEUP_CLASS && mode != BLKmode)
3798 return gen_rtx_REG (mode, SSE_REGNO (sse_regno));
3799
3800 if (n == 2
3801 && regclass[0] == X86_64_X87_CLASS && regclass[1] == X86_64_X87UP_CLASS)
3802 return gen_rtx_REG (XFmode, FIRST_STACK_REG);
3803 if (n == 2 && regclass[0] == X86_64_INTEGER_CLASS
3804 && regclass[1] == X86_64_INTEGER_CLASS
3805 && (mode == CDImode || mode == TImode || mode == TFmode)
3806 && intreg[0] + 1 == intreg[1])
3807 return gen_rtx_REG (mode, intreg[0]);
3808
3809 /* Otherwise figure out the entries of the PARALLEL. */
3810 for (i = 0; i < n; i++)
3811 {
3812 switch (regclass[i])
3813 {
3814 case X86_64_NO_CLASS:
3815 break;
3816 case X86_64_INTEGER_CLASS:
3817 case X86_64_INTEGERSI_CLASS:
3818 /* Merge TImodes on aligned occasions here too. */
3819 if (i * 8 + 8 > bytes)
3820 tmpmode = mode_for_size ((bytes - i * 8) * BITS_PER_UNIT, MODE_INT, 0);
3821 else if (regclass[i] == X86_64_INTEGERSI_CLASS)
3822 tmpmode = SImode;
3823 else
3824 tmpmode = DImode;
3825 /* We've requested 24 bytes we don't have mode for. Use DImode. */
3826 if (tmpmode == BLKmode)
3827 tmpmode = DImode;
3828 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3829 gen_rtx_REG (tmpmode, *intreg),
3830 GEN_INT (i*8));
3831 intreg++;
3832 break;
3833 case X86_64_SSESF_CLASS:
3834 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3835 gen_rtx_REG (SFmode,
3836 SSE_REGNO (sse_regno)),
3837 GEN_INT (i*8));
3838 sse_regno++;
3839 break;
3840 case X86_64_SSEDF_CLASS:
3841 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3842 gen_rtx_REG (DFmode,
3843 SSE_REGNO (sse_regno)),
3844 GEN_INT (i*8));
3845 sse_regno++;
3846 break;
3847 case X86_64_SSE_CLASS:
3848 if (i < n - 1 && regclass[i + 1] == X86_64_SSEUP_CLASS)
3849 tmpmode = TImode;
3850 else
3851 tmpmode = DImode;
3852 exp [nexps++] = gen_rtx_EXPR_LIST (VOIDmode,
3853 gen_rtx_REG (tmpmode,
3854 SSE_REGNO (sse_regno)),
3855 GEN_INT (i*8));
3856 if (tmpmode == TImode)
3857 i++;
3858 sse_regno++;
3859 break;
3860 default:
3861 gcc_unreachable ();
3862 }
3863 }
3864
3865 /* Empty aligned struct, union or class. */
3866 if (nexps == 0)
3867 return NULL;
3868
3869 ret = gen_rtx_PARALLEL (mode, rtvec_alloc (nexps));
3870 for (i = 0; i < nexps; i++)
3871 XVECEXP (ret, 0, i) = exp [i];
3872 return ret;
3873 }
3874
3875 /* Update the data in CUM to advance over an argument of mode MODE
3876 and data type TYPE. (TYPE is null for libcalls where that information
3877 may not be available.) */
3878
3879 static void
3880 function_arg_advance_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3881 tree type, HOST_WIDE_INT bytes, HOST_WIDE_INT words)
3882 {
3883 switch (mode)
3884 {
3885 default:
3886 break;
3887
3888 case BLKmode:
3889 if (bytes < 0)
3890 break;
3891 /* FALLTHRU */
3892
3893 case DImode:
3894 case SImode:
3895 case HImode:
3896 case QImode:
3897 cum->words += words;
3898 cum->nregs -= words;
3899 cum->regno += words;
3900
3901 if (cum->nregs <= 0)
3902 {
3903 cum->nregs = 0;
3904 cum->regno = 0;
3905 }
3906 break;
3907
3908 case DFmode:
3909 if (cum->float_in_sse < 2)
3910 break;
3911 case SFmode:
3912 if (cum->float_in_sse < 1)
3913 break;
3914 /* FALLTHRU */
3915
3916 case TImode:
3917 case V16QImode:
3918 case V8HImode:
3919 case V4SImode:
3920 case V2DImode:
3921 case V4SFmode:
3922 case V2DFmode:
3923 if (!type || !AGGREGATE_TYPE_P (type))
3924 {
3925 cum->sse_words += words;
3926 cum->sse_nregs -= 1;
3927 cum->sse_regno += 1;
3928 if (cum->sse_nregs <= 0)
3929 {
3930 cum->sse_nregs = 0;
3931 cum->sse_regno = 0;
3932 }
3933 }
3934 break;
3935
3936 case V8QImode:
3937 case V4HImode:
3938 case V2SImode:
3939 case V2SFmode:
3940 if (!type || !AGGREGATE_TYPE_P (type))
3941 {
3942 cum->mmx_words += words;
3943 cum->mmx_nregs -= 1;
3944 cum->mmx_regno += 1;
3945 if (cum->mmx_nregs <= 0)
3946 {
3947 cum->mmx_nregs = 0;
3948 cum->mmx_regno = 0;
3949 }
3950 }
3951 break;
3952 }
3953 }
3954
3955 static void
3956 function_arg_advance_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3957 tree type, HOST_WIDE_INT words)
3958 {
3959 int int_nregs, sse_nregs;
3960
3961 if (!examine_argument (mode, type, 0, &int_nregs, &sse_nregs))
3962 cum->words += words;
3963 else if (sse_nregs <= cum->sse_nregs && int_nregs <= cum->nregs)
3964 {
3965 cum->nregs -= int_nregs;
3966 cum->sse_nregs -= sse_nregs;
3967 cum->regno += int_nregs;
3968 cum->sse_regno += sse_nregs;
3969 }
3970 else
3971 cum->words += words;
3972 }
3973
3974 static void
3975 function_arg_advance_ms_64 (CUMULATIVE_ARGS *cum, HOST_WIDE_INT bytes,
3976 HOST_WIDE_INT words)
3977 {
3978 /* Otherwise, this should be passed indirect. */
3979 gcc_assert (bytes == 1 || bytes == 2 || bytes == 4 || bytes == 8);
3980
3981 cum->words += words;
3982 if (cum->nregs > 0)
3983 {
3984 cum->nregs -= 1;
3985 cum->regno += 1;
3986 }
3987 }
3988
3989 void
3990 function_arg_advance (CUMULATIVE_ARGS *cum, enum machine_mode mode,
3991 tree type, int named ATTRIBUTE_UNUSED)
3992 {
3993 HOST_WIDE_INT bytes, words;
3994
3995 if (mode == BLKmode)
3996 bytes = int_size_in_bytes (type);
3997 else
3998 bytes = GET_MODE_SIZE (mode);
3999 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4000
4001 if (type)
4002 mode = type_natural_mode (type);
4003
4004 if (TARGET_64BIT_MS_ABI)
4005 function_arg_advance_ms_64 (cum, bytes, words);
4006 else if (TARGET_64BIT)
4007 function_arg_advance_64 (cum, mode, type, words);
4008 else
4009 function_arg_advance_32 (cum, mode, type, bytes, words);
4010 }
4011
4012 /* Define where to put the arguments to a function.
4013 Value is zero to push the argument on the stack,
4014 or a hard register in which to store the argument.
4015
4016 MODE is the argument's machine mode.
4017 TYPE is the data type of the argument (as a tree).
4018 This is null for libcalls where that information may
4019 not be available.
4020 CUM is a variable of type CUMULATIVE_ARGS which gives info about
4021 the preceding args and about the function being called.
4022 NAMED is nonzero if this argument is a named parameter
4023 (otherwise it is an extra parameter matching an ellipsis). */
4024
4025 static rtx
4026 function_arg_32 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4027 enum machine_mode orig_mode, tree type,
4028 HOST_WIDE_INT bytes, HOST_WIDE_INT words)
4029 {
4030 static bool warnedsse, warnedmmx;
4031
4032 /* Avoid the AL settings for the Unix64 ABI. */
4033 if (mode == VOIDmode)
4034 return constm1_rtx;
4035
4036 switch (mode)
4037 {
4038 default:
4039 break;
4040
4041 case BLKmode:
4042 if (bytes < 0)
4043 break;
4044 /* FALLTHRU */
4045 case DImode:
4046 case SImode:
4047 case HImode:
4048 case QImode:
4049 if (words <= cum->nregs)
4050 {
4051 int regno = cum->regno;
4052
4053 /* Fastcall allocates the first two DWORD (SImode) or
4054 smaller arguments to ECX and EDX. */
4055 if (cum->fastcall)
4056 {
4057 if (mode == BLKmode || mode == DImode)
4058 break;
4059
4060 /* ECX not EAX is the first allocated register. */
4061 if (regno == 0)
4062 regno = 2;
4063 }
4064 return gen_rtx_REG (mode, regno);
4065 }
4066 break;
4067
4068 case DFmode:
4069 if (cum->float_in_sse < 2)
4070 break;
4071 case SFmode:
4072 if (cum->float_in_sse < 1)
4073 break;
4074 /* FALLTHRU */
4075 case TImode:
4076 case V16QImode:
4077 case V8HImode:
4078 case V4SImode:
4079 case V2DImode:
4080 case V4SFmode:
4081 case V2DFmode:
4082 if (!type || !AGGREGATE_TYPE_P (type))
4083 {
4084 if (!TARGET_SSE && !warnedsse && cum->warn_sse)
4085 {
4086 warnedsse = true;
4087 warning (0, "SSE vector argument without SSE enabled "
4088 "changes the ABI");
4089 }
4090 if (cum->sse_nregs)
4091 return gen_reg_or_parallel (mode, orig_mode,
4092 cum->sse_regno + FIRST_SSE_REG);
4093 }
4094 break;
4095
4096 case V8QImode:
4097 case V4HImode:
4098 case V2SImode:
4099 case V2SFmode:
4100 if (!type || !AGGREGATE_TYPE_P (type))
4101 {
4102 if (!TARGET_MMX && !warnedmmx && cum->warn_mmx)
4103 {
4104 warnedmmx = true;
4105 warning (0, "MMX vector argument without MMX enabled "
4106 "changes the ABI");
4107 }
4108 if (cum->mmx_nregs)
4109 return gen_reg_or_parallel (mode, orig_mode,
4110 cum->mmx_regno + FIRST_MMX_REG);
4111 }
4112 break;
4113 }
4114
4115 return NULL_RTX;
4116 }
4117
4118 static rtx
4119 function_arg_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4120 enum machine_mode orig_mode, tree type)
4121 {
4122 /* Handle a hidden AL argument containing number of registers
4123 for varargs x86-64 functions. */
4124 if (mode == VOIDmode)
4125 return GEN_INT (cum->maybe_vaarg
4126 ? (cum->sse_nregs < 0
4127 ? SSE_REGPARM_MAX
4128 : cum->sse_regno)
4129 : -1);
4130
4131 return construct_container (mode, orig_mode, type, 0, cum->nregs,
4132 cum->sse_nregs,
4133 &x86_64_int_parameter_registers [cum->regno],
4134 cum->sse_regno);
4135 }
4136
4137 static rtx
4138 function_arg_ms_64 (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4139 enum machine_mode orig_mode, int named)
4140 {
4141 unsigned int regno;
4142
4143 /* Avoid the AL settings for the Unix64 ABI. */
4144 if (mode == VOIDmode)
4145 return constm1_rtx;
4146
4147 /* If we've run out of registers, it goes on the stack. */
4148 if (cum->nregs == 0)
4149 return NULL_RTX;
4150
4151 regno = x86_64_ms_abi_int_parameter_registers[cum->regno];
4152
4153 /* Only floating point modes are passed in anything but integer regs. */
4154 if (TARGET_SSE && (mode == SFmode || mode == DFmode))
4155 {
4156 if (named)
4157 regno = cum->regno + FIRST_SSE_REG;
4158 else
4159 {
4160 rtx t1, t2;
4161
4162 /* Unnamed floating parameters are passed in both the
4163 SSE and integer registers. */
4164 t1 = gen_rtx_REG (mode, cum->regno + FIRST_SSE_REG);
4165 t2 = gen_rtx_REG (mode, regno);
4166 t1 = gen_rtx_EXPR_LIST (VOIDmode, t1, const0_rtx);
4167 t2 = gen_rtx_EXPR_LIST (VOIDmode, t2, const0_rtx);
4168 return gen_rtx_PARALLEL (mode, gen_rtvec (2, t1, t2));
4169 }
4170 }
4171
4172 return gen_reg_or_parallel (mode, orig_mode, regno);
4173 }
4174
4175 rtx
4176 function_arg (CUMULATIVE_ARGS *cum, enum machine_mode omode,
4177 tree type, int named)
4178 {
4179 enum machine_mode mode = omode;
4180 HOST_WIDE_INT bytes, words;
4181
4182 if (mode == BLKmode)
4183 bytes = int_size_in_bytes (type);
4184 else
4185 bytes = GET_MODE_SIZE (mode);
4186 words = (bytes + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4187
4188 /* To simplify the code below, represent vector types with a vector mode
4189 even if MMX/SSE are not active. */
4190 if (type && TREE_CODE (type) == VECTOR_TYPE)
4191 mode = type_natural_mode (type);
4192
4193 if (TARGET_64BIT_MS_ABI)
4194 return function_arg_ms_64 (cum, mode, omode, named);
4195 else if (TARGET_64BIT)
4196 return function_arg_64 (cum, mode, omode, type);
4197 else
4198 return function_arg_32 (cum, mode, omode, type, bytes, words);
4199 }
4200
4201 /* A C expression that indicates when an argument must be passed by
4202 reference. If nonzero for an argument, a copy of that argument is
4203 made in memory and a pointer to the argument is passed instead of
4204 the argument itself. The pointer is passed in whatever way is
4205 appropriate for passing a pointer to that type. */
4206
4207 static bool
4208 ix86_pass_by_reference (CUMULATIVE_ARGS *cum ATTRIBUTE_UNUSED,
4209 enum machine_mode mode ATTRIBUTE_UNUSED,
4210 const_tree type, bool named ATTRIBUTE_UNUSED)
4211 {
4212 if (TARGET_64BIT_MS_ABI)
4213 {
4214 if (type)
4215 {
4216 /* Arrays are passed by reference. */
4217 if (TREE_CODE (type) == ARRAY_TYPE)
4218 return true;
4219
4220 if (AGGREGATE_TYPE_P (type))
4221 {
4222 /* Structs/unions of sizes other than 8, 16, 32, or 64 bits
4223 are passed by reference. */
4224 int el2 = exact_log2 (int_size_in_bytes (type));
4225 return !(el2 >= 0 && el2 <= 3);
4226 }
4227 }
4228
4229 /* __m128 is passed by reference. */
4230 /* ??? How to handle complex? For now treat them as structs,
4231 and pass them by reference if they're too large. */
4232 if (GET_MODE_SIZE (mode) > 8)
4233 return true;
4234 }
4235 else if (TARGET_64BIT && type && int_size_in_bytes (type) == -1)
4236 return 1;
4237
4238 return 0;
4239 }
4240
4241 /* Return true when TYPE should be 128bit aligned for 32bit argument passing
4242 ABI. Only called if TARGET_SSE. */
4243 static bool
4244 contains_128bit_aligned_vector_p (tree type)
4245 {
4246 enum machine_mode mode = TYPE_MODE (type);
4247 if (SSE_REG_MODE_P (mode)
4248 && (!TYPE_USER_ALIGN (type) || TYPE_ALIGN (type) > 128))
4249 return true;
4250 if (TYPE_ALIGN (type) < 128)
4251 return false;
4252
4253 if (AGGREGATE_TYPE_P (type))
4254 {
4255 /* Walk the aggregates recursively. */
4256 switch (TREE_CODE (type))
4257 {
4258 case RECORD_TYPE:
4259 case UNION_TYPE:
4260 case QUAL_UNION_TYPE:
4261 {
4262 tree field;
4263
4264 /* Walk all the structure fields. */
4265 for (field = TYPE_FIELDS (type); field; field = TREE_CHAIN (field))
4266 {
4267 if (TREE_CODE (field) == FIELD_DECL
4268 && contains_128bit_aligned_vector_p (TREE_TYPE (field)))
4269 return true;
4270 }
4271 break;
4272 }
4273
4274 case ARRAY_TYPE:
4275 /* Just for use if some languages passes arrays by value. */
4276 if (contains_128bit_aligned_vector_p (TREE_TYPE (type)))
4277 return true;
4278 break;
4279
4280 default:
4281 gcc_unreachable ();
4282 }
4283 }
4284 return false;
4285 }
4286
4287 /* Gives the alignment boundary, in bits, of an argument with the
4288 specified mode and type. */
4289
4290 int
4291 ix86_function_arg_boundary (enum machine_mode mode, tree type)
4292 {
4293 int align;
4294 if (type)
4295 align = TYPE_ALIGN (type);
4296 else
4297 align = GET_MODE_ALIGNMENT (mode);
4298 if (align < PARM_BOUNDARY)
4299 align = PARM_BOUNDARY;
4300 if (!TARGET_64BIT)
4301 {
4302 /* i386 ABI defines all arguments to be 4 byte aligned. We have to
4303 make an exception for SSE modes since these require 128bit
4304 alignment.
4305
4306 The handling here differs from field_alignment. ICC aligns MMX
4307 arguments to 4 byte boundaries, while structure fields are aligned
4308 to 8 byte boundaries. */
4309 if (!TARGET_SSE)
4310 align = PARM_BOUNDARY;
4311 else if (!type)
4312 {
4313 if (!SSE_REG_MODE_P (mode))
4314 align = PARM_BOUNDARY;
4315 }
4316 else
4317 {
4318 if (!contains_128bit_aligned_vector_p (type))
4319 align = PARM_BOUNDARY;
4320 }
4321 }
4322 if (align > 128)
4323 align = 128;
4324 return align;
4325 }
4326
4327 /* Return true if N is a possible register number of function value. */
4328
4329 bool
4330 ix86_function_value_regno_p (int regno)
4331 {
4332 switch (regno)
4333 {
4334 case 0:
4335 return true;
4336
4337 case FIRST_FLOAT_REG:
4338 if (TARGET_64BIT_MS_ABI)
4339 return false;
4340 return TARGET_FLOAT_RETURNS_IN_80387;
4341
4342 case FIRST_SSE_REG:
4343 return TARGET_SSE;
4344
4345 case FIRST_MMX_REG:
4346 if (TARGET_MACHO || TARGET_64BIT)
4347 return false;
4348 return TARGET_MMX;
4349 }
4350
4351 return false;
4352 }
4353
4354 /* Define how to find the value returned by a function.
4355 VALTYPE is the data type of the value (as a tree).
4356 If the precise function being called is known, FUNC is its FUNCTION_DECL;
4357 otherwise, FUNC is 0. */
4358
4359 static rtx
4360 function_value_32 (enum machine_mode orig_mode, enum machine_mode mode,
4361 const_tree fntype, const_tree fn)
4362 {
4363 unsigned int regno;
4364
4365 /* 8-byte vector modes in %mm0. See ix86_return_in_memory for where
4366 we normally prevent this case when mmx is not available. However
4367 some ABIs may require the result to be returned like DImode. */
4368 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4369 regno = TARGET_MMX ? FIRST_MMX_REG : 0;
4370
4371 /* 16-byte vector modes in %xmm0. See ix86_return_in_memory for where
4372 we prevent this case when sse is not available. However some ABIs
4373 may require the result to be returned like integer TImode. */
4374 else if (mode == TImode
4375 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4376 regno = TARGET_SSE ? FIRST_SSE_REG : 0;
4377
4378 /* Floating point return values in %st(0) (unless -mno-fp-ret-in-387). */
4379 else if (X87_FLOAT_MODE_P (mode) && TARGET_FLOAT_RETURNS_IN_80387)
4380 regno = FIRST_FLOAT_REG;
4381 else
4382 /* Most things go in %eax. */
4383 regno = 0;
4384
4385 /* Override FP return register with %xmm0 for local functions when
4386 SSE math is enabled or for functions with sseregparm attribute. */
4387 if ((fn || fntype) && (mode == SFmode || mode == DFmode))
4388 {
4389 int sse_level = ix86_function_sseregparm (fntype, fn);
4390 if ((sse_level >= 1 && mode == SFmode)
4391 || (sse_level == 2 && mode == DFmode))
4392 regno = FIRST_SSE_REG;
4393 }
4394
4395 return gen_rtx_REG (orig_mode, regno);
4396 }
4397
4398 static rtx
4399 function_value_64 (enum machine_mode orig_mode, enum machine_mode mode,
4400 const_tree valtype)
4401 {
4402 rtx ret;
4403
4404 /* Handle libcalls, which don't provide a type node. */
4405 if (valtype == NULL)
4406 {
4407 switch (mode)
4408 {
4409 case SFmode:
4410 case SCmode:
4411 case DFmode:
4412 case DCmode:
4413 case TFmode:
4414 case SDmode:
4415 case DDmode:
4416 case TDmode:
4417 return gen_rtx_REG (mode, FIRST_SSE_REG);
4418 case XFmode:
4419 case XCmode:
4420 return gen_rtx_REG (mode, FIRST_FLOAT_REG);
4421 case TCmode:
4422 return NULL;
4423 default:
4424 return gen_rtx_REG (mode, 0);
4425 }
4426 }
4427
4428 ret = construct_container (mode, orig_mode, valtype, 1,
4429 REGPARM_MAX, SSE_REGPARM_MAX,
4430 x86_64_int_return_registers, 0);
4431
4432 /* For zero sized structures, construct_container returns NULL, but we
4433 need to keep rest of compiler happy by returning meaningful value. */
4434 if (!ret)
4435 ret = gen_rtx_REG (orig_mode, 0);
4436
4437 return ret;
4438 }
4439
4440 static rtx
4441 function_value_ms_64 (enum machine_mode orig_mode, enum machine_mode mode)
4442 {
4443 unsigned int regno = 0;
4444
4445 if (TARGET_SSE)
4446 {
4447 if (mode == SFmode || mode == DFmode)
4448 regno = FIRST_SSE_REG;
4449 else if (VECTOR_MODE_P (mode) || GET_MODE_SIZE (mode) == 16)
4450 regno = FIRST_SSE_REG;
4451 }
4452
4453 return gen_rtx_REG (orig_mode, regno);
4454 }
4455
4456 static rtx
4457 ix86_function_value_1 (const_tree valtype, const_tree fntype_or_decl,
4458 enum machine_mode orig_mode, enum machine_mode mode)
4459 {
4460 const_tree fn, fntype;
4461
4462 fn = NULL_TREE;
4463 if (fntype_or_decl && DECL_P (fntype_or_decl))
4464 fn = fntype_or_decl;
4465 fntype = fn ? TREE_TYPE (fn) : fntype_or_decl;
4466
4467 if (TARGET_64BIT_MS_ABI)
4468 return function_value_ms_64 (orig_mode, mode);
4469 else if (TARGET_64BIT)
4470 return function_value_64 (orig_mode, mode, valtype);
4471 else
4472 return function_value_32 (orig_mode, mode, fntype, fn);
4473 }
4474
4475 static rtx
4476 ix86_function_value (const_tree valtype, const_tree fntype_or_decl,
4477 bool outgoing ATTRIBUTE_UNUSED)
4478 {
4479 enum machine_mode mode, orig_mode;
4480
4481 orig_mode = TYPE_MODE (valtype);
4482 mode = type_natural_mode (valtype);
4483 return ix86_function_value_1 (valtype, fntype_or_decl, orig_mode, mode);
4484 }
4485
4486 rtx
4487 ix86_libcall_value (enum machine_mode mode)
4488 {
4489 return ix86_function_value_1 (NULL, NULL, mode, mode);
4490 }
4491
4492 /* Return true iff type is returned in memory. */
4493
4494 static int
4495 return_in_memory_32 (const_tree type, enum machine_mode mode)
4496 {
4497 HOST_WIDE_INT size;
4498
4499 if (mode == BLKmode)
4500 return 1;
4501
4502 size = int_size_in_bytes (type);
4503
4504 if (MS_AGGREGATE_RETURN && AGGREGATE_TYPE_P (type) && size <= 8)
4505 return 0;
4506
4507 if (VECTOR_MODE_P (mode) || mode == TImode)
4508 {
4509 /* User-created vectors small enough to fit in EAX. */
4510 if (size < 8)
4511 return 0;
4512
4513 /* MMX/3dNow values are returned in MM0,
4514 except when it doesn't exits. */
4515 if (size == 8)
4516 return (TARGET_MMX ? 0 : 1);
4517
4518 /* SSE values are returned in XMM0, except when it doesn't exist. */
4519 if (size == 16)
4520 return (TARGET_SSE ? 0 : 1);
4521 }
4522
4523 if (mode == XFmode)
4524 return 0;
4525
4526 if (mode == TDmode)
4527 return 1;
4528
4529 if (size > 12)
4530 return 1;
4531 return 0;
4532 }
4533
4534 static int
4535 return_in_memory_64 (const_tree type, enum machine_mode mode)
4536 {
4537 int needed_intregs, needed_sseregs;
4538 return !examine_argument (mode, type, 1, &needed_intregs, &needed_sseregs);
4539 }
4540
4541 static int
4542 return_in_memory_ms_64 (const_tree type, enum machine_mode mode)
4543 {
4544 HOST_WIDE_INT size = int_size_in_bytes (type);
4545
4546 /* __m128 and friends are returned in xmm0. */
4547 if (size == 16 && VECTOR_MODE_P (mode))
4548 return 0;
4549
4550 /* Otherwise, the size must be exactly in [1248]. */
4551 return (size != 1 && size != 2 && size != 4 && size != 8);
4552 }
4553
4554 int
4555 ix86_return_in_memory (const_tree type)
4556 {
4557 const enum machine_mode mode = type_natural_mode (type);
4558
4559 if (TARGET_64BIT_MS_ABI)
4560 return return_in_memory_ms_64 (type, mode);
4561 else if (TARGET_64BIT)
4562 return return_in_memory_64 (type, mode);
4563 else
4564 return return_in_memory_32 (type, mode);
4565 }
4566
4567 /* Return false iff TYPE is returned in memory. This version is used
4568 on Solaris 10. It is similar to the generic ix86_return_in_memory,
4569 but differs notably in that when MMX is available, 8-byte vectors
4570 are returned in memory, rather than in MMX registers. */
4571
4572 int
4573 ix86_sol10_return_in_memory (const_tree type)
4574 {
4575 int size;
4576 enum machine_mode mode = type_natural_mode (type);
4577
4578 if (TARGET_64BIT)
4579 return return_in_memory_64 (type, mode);
4580
4581 if (mode == BLKmode)
4582 return 1;
4583
4584 size = int_size_in_bytes (type);
4585
4586 if (VECTOR_MODE_P (mode))
4587 {
4588 /* Return in memory only if MMX registers *are* available. This
4589 seems backwards, but it is consistent with the existing
4590 Solaris x86 ABI. */
4591 if (size == 8)
4592 return TARGET_MMX;
4593 if (size == 16)
4594 return !TARGET_SSE;
4595 }
4596 else if (mode == TImode)
4597 return !TARGET_SSE;
4598 else if (mode == XFmode)
4599 return 0;
4600
4601 return size > 12;
4602 }
4603
4604 /* When returning SSE vector types, we have a choice of either
4605 (1) being abi incompatible with a -march switch, or
4606 (2) generating an error.
4607 Given no good solution, I think the safest thing is one warning.
4608 The user won't be able to use -Werror, but....
4609
4610 Choose the STRUCT_VALUE_RTX hook because that's (at present) only
4611 called in response to actually generating a caller or callee that
4612 uses such a type. As opposed to RETURN_IN_MEMORY, which is called
4613 via aggregate_value_p for general type probing from tree-ssa. */
4614
4615 static rtx
4616 ix86_struct_value_rtx (tree type, int incoming ATTRIBUTE_UNUSED)
4617 {
4618 static bool warnedsse, warnedmmx;
4619
4620 if (!TARGET_64BIT && type)
4621 {
4622 /* Look at the return type of the function, not the function type. */
4623 enum machine_mode mode = TYPE_MODE (TREE_TYPE (type));
4624
4625 if (!TARGET_SSE && !warnedsse)
4626 {
4627 if (mode == TImode
4628 || (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 16))
4629 {
4630 warnedsse = true;
4631 warning (0, "SSE vector return without SSE enabled "
4632 "changes the ABI");
4633 }
4634 }
4635
4636 if (!TARGET_MMX && !warnedmmx)
4637 {
4638 if (VECTOR_MODE_P (mode) && GET_MODE_SIZE (mode) == 8)
4639 {
4640 warnedmmx = true;
4641 warning (0, "MMX vector return without MMX enabled "
4642 "changes the ABI");
4643 }
4644 }
4645 }
4646
4647 return NULL;
4648 }
4649
4650 \f
4651 /* Create the va_list data type. */
4652
4653 static tree
4654 ix86_build_builtin_va_list (void)
4655 {
4656 tree f_gpr, f_fpr, f_ovf, f_sav, record, type_decl;
4657
4658 /* For i386 we use plain pointer to argument area. */
4659 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4660 return build_pointer_type (char_type_node);
4661
4662 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
4663 type_decl = build_decl (TYPE_DECL, get_identifier ("__va_list_tag"), record);
4664
4665 f_gpr = build_decl (FIELD_DECL, get_identifier ("gp_offset"),
4666 unsigned_type_node);
4667 f_fpr = build_decl (FIELD_DECL, get_identifier ("fp_offset"),
4668 unsigned_type_node);
4669 f_ovf = build_decl (FIELD_DECL, get_identifier ("overflow_arg_area"),
4670 ptr_type_node);
4671 f_sav = build_decl (FIELD_DECL, get_identifier ("reg_save_area"),
4672 ptr_type_node);
4673
4674 va_list_gpr_counter_field = f_gpr;
4675 va_list_fpr_counter_field = f_fpr;
4676
4677 DECL_FIELD_CONTEXT (f_gpr) = record;
4678 DECL_FIELD_CONTEXT (f_fpr) = record;
4679 DECL_FIELD_CONTEXT (f_ovf) = record;
4680 DECL_FIELD_CONTEXT (f_sav) = record;
4681
4682 TREE_CHAIN (record) = type_decl;
4683 TYPE_NAME (record) = type_decl;
4684 TYPE_FIELDS (record) = f_gpr;
4685 TREE_CHAIN (f_gpr) = f_fpr;
4686 TREE_CHAIN (f_fpr) = f_ovf;
4687 TREE_CHAIN (f_ovf) = f_sav;
4688
4689 layout_type (record);
4690
4691 /* The correct type is an array type of one element. */
4692 return build_array_type (record, build_index_type (size_zero_node));
4693 }
4694
4695 /* Worker function for TARGET_SETUP_INCOMING_VARARGS. */
4696
4697 static void
4698 setup_incoming_varargs_64 (CUMULATIVE_ARGS *cum)
4699 {
4700 rtx save_area, mem;
4701 rtx label;
4702 rtx label_ref;
4703 rtx tmp_reg;
4704 rtx nsse_reg;
4705 alias_set_type set;
4706 int i;
4707
4708 if (! cfun->va_list_gpr_size && ! cfun->va_list_fpr_size)
4709 return;
4710
4711 /* Indicate to allocate space on the stack for varargs save area. */
4712 ix86_save_varrargs_registers = 1;
4713 /* We need 16-byte stack alignment to save SSE registers. If user
4714 asked for lower preferred_stack_boundary, lets just hope that he knows
4715 what he is doing and won't varargs SSE values.
4716
4717 We also may end up assuming that only 64bit values are stored in SSE
4718 register let some floating point program work. */
4719 if (ix86_preferred_stack_boundary >= 128)
4720 cfun->stack_alignment_needed = 128;
4721
4722 save_area = frame_pointer_rtx;
4723 set = get_varargs_alias_set ();
4724
4725 for (i = cum->regno;
4726 i < ix86_regparm
4727 && i < cum->regno + cfun->va_list_gpr_size / UNITS_PER_WORD;
4728 i++)
4729 {
4730 mem = gen_rtx_MEM (Pmode,
4731 plus_constant (save_area, i * UNITS_PER_WORD));
4732 MEM_NOTRAP_P (mem) = 1;
4733 set_mem_alias_set (mem, set);
4734 emit_move_insn (mem, gen_rtx_REG (Pmode,
4735 x86_64_int_parameter_registers[i]));
4736 }
4737
4738 if (cum->sse_nregs && cfun->va_list_fpr_size)
4739 {
4740 /* Now emit code to save SSE registers. The AX parameter contains number
4741 of SSE parameter registers used to call this function. We use
4742 sse_prologue_save insn template that produces computed jump across
4743 SSE saves. We need some preparation work to get this working. */
4744
4745 label = gen_label_rtx ();
4746 label_ref = gen_rtx_LABEL_REF (Pmode, label);
4747
4748 /* Compute address to jump to :
4749 label - 5*eax + nnamed_sse_arguments*5 */
4750 tmp_reg = gen_reg_rtx (Pmode);
4751 nsse_reg = gen_reg_rtx (Pmode);
4752 emit_insn (gen_zero_extendqidi2 (nsse_reg, gen_rtx_REG (QImode, 0)));
4753 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4754 gen_rtx_MULT (Pmode, nsse_reg,
4755 GEN_INT (4))));
4756 if (cum->sse_regno)
4757 emit_move_insn
4758 (nsse_reg,
4759 gen_rtx_CONST (DImode,
4760 gen_rtx_PLUS (DImode,
4761 label_ref,
4762 GEN_INT (cum->sse_regno * 4))));
4763 else
4764 emit_move_insn (nsse_reg, label_ref);
4765 emit_insn (gen_subdi3 (nsse_reg, nsse_reg, tmp_reg));
4766
4767 /* Compute address of memory block we save into. We always use pointer
4768 pointing 127 bytes after first byte to store - this is needed to keep
4769 instruction size limited by 4 bytes. */
4770 tmp_reg = gen_reg_rtx (Pmode);
4771 emit_insn (gen_rtx_SET (VOIDmode, tmp_reg,
4772 plus_constant (save_area,
4773 8 * REGPARM_MAX + 127)));
4774 mem = gen_rtx_MEM (BLKmode, plus_constant (tmp_reg, -127));
4775 MEM_NOTRAP_P (mem) = 1;
4776 set_mem_alias_set (mem, set);
4777 set_mem_align (mem, BITS_PER_WORD);
4778
4779 /* And finally do the dirty job! */
4780 emit_insn (gen_sse_prologue_save (mem, nsse_reg,
4781 GEN_INT (cum->sse_regno), label));
4782 }
4783 }
4784
4785 static void
4786 setup_incoming_varargs_ms_64 (CUMULATIVE_ARGS *cum)
4787 {
4788 alias_set_type set = get_varargs_alias_set ();
4789 int i;
4790
4791 for (i = cum->regno; i < REGPARM_MAX; i++)
4792 {
4793 rtx reg, mem;
4794
4795 mem = gen_rtx_MEM (Pmode,
4796 plus_constant (virtual_incoming_args_rtx,
4797 i * UNITS_PER_WORD));
4798 MEM_NOTRAP_P (mem) = 1;
4799 set_mem_alias_set (mem, set);
4800
4801 reg = gen_rtx_REG (Pmode, x86_64_ms_abi_int_parameter_registers[i]);
4802 emit_move_insn (mem, reg);
4803 }
4804 }
4805
4806 static void
4807 ix86_setup_incoming_varargs (CUMULATIVE_ARGS *cum, enum machine_mode mode,
4808 tree type, int *pretend_size ATTRIBUTE_UNUSED,
4809 int no_rtl)
4810 {
4811 CUMULATIVE_ARGS next_cum;
4812 tree fntype;
4813 int stdarg_p;
4814
4815 /* This argument doesn't appear to be used anymore. Which is good,
4816 because the old code here didn't suppress rtl generation. */
4817 gcc_assert (!no_rtl);
4818
4819 if (!TARGET_64BIT)
4820 return;
4821
4822 fntype = TREE_TYPE (current_function_decl);
4823 stdarg_p = (TYPE_ARG_TYPES (fntype) != 0
4824 && (TREE_VALUE (tree_last (TYPE_ARG_TYPES (fntype)))
4825 != void_type_node));
4826
4827 /* For varargs, we do not want to skip the dummy va_dcl argument.
4828 For stdargs, we do want to skip the last named argument. */
4829 next_cum = *cum;
4830 if (stdarg_p)
4831 function_arg_advance (&next_cum, mode, type, 1);
4832
4833 if (TARGET_64BIT_MS_ABI)
4834 setup_incoming_varargs_ms_64 (&next_cum);
4835 else
4836 setup_incoming_varargs_64 (&next_cum);
4837 }
4838
4839 /* Implement va_start. */
4840
4841 void
4842 ix86_va_start (tree valist, rtx nextarg)
4843 {
4844 HOST_WIDE_INT words, n_gpr, n_fpr;
4845 tree f_gpr, f_fpr, f_ovf, f_sav;
4846 tree gpr, fpr, ovf, sav, t;
4847 tree type;
4848
4849 /* Only 64bit target needs something special. */
4850 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4851 {
4852 std_expand_builtin_va_start (valist, nextarg);
4853 return;
4854 }
4855
4856 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4857 f_fpr = TREE_CHAIN (f_gpr);
4858 f_ovf = TREE_CHAIN (f_fpr);
4859 f_sav = TREE_CHAIN (f_ovf);
4860
4861 valist = build1 (INDIRECT_REF, TREE_TYPE (TREE_TYPE (valist)), valist);
4862 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4863 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4864 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4865 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4866
4867 /* Count number of gp and fp argument registers used. */
4868 words = current_function_args_info.words;
4869 n_gpr = current_function_args_info.regno;
4870 n_fpr = current_function_args_info.sse_regno;
4871
4872 if (cfun->va_list_gpr_size)
4873 {
4874 type = TREE_TYPE (gpr);
4875 t = build2 (GIMPLE_MODIFY_STMT, type, gpr,
4876 build_int_cst (type, n_gpr * 8));
4877 TREE_SIDE_EFFECTS (t) = 1;
4878 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4879 }
4880
4881 if (cfun->va_list_fpr_size)
4882 {
4883 type = TREE_TYPE (fpr);
4884 t = build2 (GIMPLE_MODIFY_STMT, type, fpr,
4885 build_int_cst (type, n_fpr * 16 + 8*REGPARM_MAX));
4886 TREE_SIDE_EFFECTS (t) = 1;
4887 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4888 }
4889
4890 /* Find the overflow area. */
4891 type = TREE_TYPE (ovf);
4892 t = make_tree (type, virtual_incoming_args_rtx);
4893 if (words != 0)
4894 t = build2 (POINTER_PLUS_EXPR, type, t,
4895 size_int (words * UNITS_PER_WORD));
4896 t = build2 (GIMPLE_MODIFY_STMT, type, ovf, t);
4897 TREE_SIDE_EFFECTS (t) = 1;
4898 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4899
4900 if (cfun->va_list_gpr_size || cfun->va_list_fpr_size)
4901 {
4902 /* Find the register save area.
4903 Prologue of the function save it right above stack frame. */
4904 type = TREE_TYPE (sav);
4905 t = make_tree (type, frame_pointer_rtx);
4906 t = build2 (GIMPLE_MODIFY_STMT, type, sav, t);
4907 TREE_SIDE_EFFECTS (t) = 1;
4908 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
4909 }
4910 }
4911
4912 /* Implement va_arg. */
4913
4914 static tree
4915 ix86_gimplify_va_arg (tree valist, tree type, tree *pre_p, tree *post_p)
4916 {
4917 static const int intreg[6] = { 0, 1, 2, 3, 4, 5 };
4918 tree f_gpr, f_fpr, f_ovf, f_sav;
4919 tree gpr, fpr, ovf, sav, t;
4920 int size, rsize;
4921 tree lab_false, lab_over = NULL_TREE;
4922 tree addr, t2;
4923 rtx container;
4924 int indirect_p = 0;
4925 tree ptrtype;
4926 enum machine_mode nat_mode;
4927
4928 /* Only 64bit target needs something special. */
4929 if (!TARGET_64BIT || TARGET_64BIT_MS_ABI)
4930 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
4931
4932 f_gpr = TYPE_FIELDS (TREE_TYPE (va_list_type_node));
4933 f_fpr = TREE_CHAIN (f_gpr);
4934 f_ovf = TREE_CHAIN (f_fpr);
4935 f_sav = TREE_CHAIN (f_ovf);
4936
4937 valist = build_va_arg_indirect_ref (valist);
4938 gpr = build3 (COMPONENT_REF, TREE_TYPE (f_gpr), valist, f_gpr, NULL_TREE);
4939 fpr = build3 (COMPONENT_REF, TREE_TYPE (f_fpr), valist, f_fpr, NULL_TREE);
4940 ovf = build3 (COMPONENT_REF, TREE_TYPE (f_ovf), valist, f_ovf, NULL_TREE);
4941 sav = build3 (COMPONENT_REF, TREE_TYPE (f_sav), valist, f_sav, NULL_TREE);
4942
4943 indirect_p = pass_by_reference (NULL, TYPE_MODE (type), type, false);
4944 if (indirect_p)
4945 type = build_pointer_type (type);
4946 size = int_size_in_bytes (type);
4947 rsize = (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
4948
4949 nat_mode = type_natural_mode (type);
4950 container = construct_container (nat_mode, TYPE_MODE (type), type, 0,
4951 REGPARM_MAX, SSE_REGPARM_MAX, intreg, 0);
4952
4953 /* Pull the value out of the saved registers. */
4954
4955 addr = create_tmp_var (ptr_type_node, "addr");
4956 DECL_POINTER_ALIAS_SET (addr) = get_varargs_alias_set ();
4957
4958 if (container)
4959 {
4960 int needed_intregs, needed_sseregs;
4961 bool need_temp;
4962 tree int_addr, sse_addr;
4963
4964 lab_false = create_artificial_label ();
4965 lab_over = create_artificial_label ();
4966
4967 examine_argument (nat_mode, type, 0, &needed_intregs, &needed_sseregs);
4968
4969 need_temp = (!REG_P (container)
4970 && ((needed_intregs && TYPE_ALIGN (type) > 64)
4971 || TYPE_ALIGN (type) > 128));
4972
4973 /* In case we are passing structure, verify that it is consecutive block
4974 on the register save area. If not we need to do moves. */
4975 if (!need_temp && !REG_P (container))
4976 {
4977 /* Verify that all registers are strictly consecutive */
4978 if (SSE_REGNO_P (REGNO (XEXP (XVECEXP (container, 0, 0), 0))))
4979 {
4980 int i;
4981
4982 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4983 {
4984 rtx slot = XVECEXP (container, 0, i);
4985 if (REGNO (XEXP (slot, 0)) != FIRST_SSE_REG + (unsigned int) i
4986 || INTVAL (XEXP (slot, 1)) != i * 16)
4987 need_temp = 1;
4988 }
4989 }
4990 else
4991 {
4992 int i;
4993
4994 for (i = 0; i < XVECLEN (container, 0) && !need_temp; i++)
4995 {
4996 rtx slot = XVECEXP (container, 0, i);
4997 if (REGNO (XEXP (slot, 0)) != (unsigned int) i
4998 || INTVAL (XEXP (slot, 1)) != i * 8)
4999 need_temp = 1;
5000 }
5001 }
5002 }
5003 if (!need_temp)
5004 {
5005 int_addr = addr;
5006 sse_addr = addr;
5007 }
5008 else
5009 {
5010 int_addr = create_tmp_var (ptr_type_node, "int_addr");
5011 DECL_POINTER_ALIAS_SET (int_addr) = get_varargs_alias_set ();
5012 sse_addr = create_tmp_var (ptr_type_node, "sse_addr");
5013 DECL_POINTER_ALIAS_SET (sse_addr) = get_varargs_alias_set ();
5014 }
5015
5016 /* First ensure that we fit completely in registers. */
5017 if (needed_intregs)
5018 {
5019 t = build_int_cst (TREE_TYPE (gpr),
5020 (REGPARM_MAX - needed_intregs + 1) * 8);
5021 t = build2 (GE_EXPR, boolean_type_node, gpr, t);
5022 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5023 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5024 gimplify_and_add (t, pre_p);
5025 }
5026 if (needed_sseregs)
5027 {
5028 t = build_int_cst (TREE_TYPE (fpr),
5029 (SSE_REGPARM_MAX - needed_sseregs + 1) * 16
5030 + REGPARM_MAX * 8);
5031 t = build2 (GE_EXPR, boolean_type_node, fpr, t);
5032 t2 = build1 (GOTO_EXPR, void_type_node, lab_false);
5033 t = build3 (COND_EXPR, void_type_node, t, t2, NULL_TREE);
5034 gimplify_and_add (t, pre_p);
5035 }
5036
5037 /* Compute index to start of area used for integer regs. */
5038 if (needed_intregs)
5039 {
5040 /* int_addr = gpr + sav; */
5041 t = fold_convert (sizetype, gpr);
5042 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5043 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, int_addr, t);
5044 gimplify_and_add (t, pre_p);
5045 }
5046 if (needed_sseregs)
5047 {
5048 /* sse_addr = fpr + sav; */
5049 t = fold_convert (sizetype, fpr);
5050 t = build2 (POINTER_PLUS_EXPR, ptr_type_node, sav, t);
5051 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, sse_addr, t);
5052 gimplify_and_add (t, pre_p);
5053 }
5054 if (need_temp)
5055 {
5056 int i;
5057 tree temp = create_tmp_var (type, "va_arg_tmp");
5058
5059 /* addr = &temp; */
5060 t = build1 (ADDR_EXPR, build_pointer_type (type), temp);
5061 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5062 gimplify_and_add (t, pre_p);
5063
5064 for (i = 0; i < XVECLEN (container, 0); i++)
5065 {
5066 rtx slot = XVECEXP (container, 0, i);
5067 rtx reg = XEXP (slot, 0);
5068 enum machine_mode mode = GET_MODE (reg);
5069 tree piece_type = lang_hooks.types.type_for_mode (mode, 1);
5070 tree addr_type = build_pointer_type (piece_type);
5071 tree src_addr, src;
5072 int src_offset;
5073 tree dest_addr, dest;
5074
5075 if (SSE_REGNO_P (REGNO (reg)))
5076 {
5077 src_addr = sse_addr;
5078 src_offset = (REGNO (reg) - FIRST_SSE_REG) * 16;
5079 }
5080 else
5081 {
5082 src_addr = int_addr;
5083 src_offset = REGNO (reg) * 8;
5084 }
5085 src_addr = fold_convert (addr_type, src_addr);
5086 src_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, src_addr,
5087 size_int (src_offset));
5088 src = build_va_arg_indirect_ref (src_addr);
5089
5090 dest_addr = fold_convert (addr_type, addr);
5091 dest_addr = fold_build2 (POINTER_PLUS_EXPR, addr_type, dest_addr,
5092 size_int (INTVAL (XEXP (slot, 1))));
5093 dest = build_va_arg_indirect_ref (dest_addr);
5094
5095 t = build2 (GIMPLE_MODIFY_STMT, void_type_node, dest, src);
5096 gimplify_and_add (t, pre_p);
5097 }
5098 }
5099
5100 if (needed_intregs)
5101 {
5102 t = build2 (PLUS_EXPR, TREE_TYPE (gpr), gpr,
5103 build_int_cst (TREE_TYPE (gpr), needed_intregs * 8));
5104 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (gpr), gpr, t);
5105 gimplify_and_add (t, pre_p);
5106 }
5107 if (needed_sseregs)
5108 {
5109 t = build2 (PLUS_EXPR, TREE_TYPE (fpr), fpr,
5110 build_int_cst (TREE_TYPE (fpr), needed_sseregs * 16));
5111 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (fpr), fpr, t);
5112 gimplify_and_add (t, pre_p);
5113 }
5114
5115 t = build1 (GOTO_EXPR, void_type_node, lab_over);
5116 gimplify_and_add (t, pre_p);
5117
5118 t = build1 (LABEL_EXPR, void_type_node, lab_false);
5119 append_to_statement_list (t, pre_p);
5120 }
5121
5122 /* ... otherwise out of the overflow area. */
5123
5124 /* Care for on-stack alignment if needed. */
5125 if (FUNCTION_ARG_BOUNDARY (VOIDmode, type) <= 64
5126 || integer_zerop (TYPE_SIZE (type)))
5127 t = ovf;
5128 else
5129 {
5130 HOST_WIDE_INT align = FUNCTION_ARG_BOUNDARY (VOIDmode, type) / 8;
5131 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (ovf), ovf,
5132 size_int (align - 1));
5133 t = fold_convert (sizetype, t);
5134 t = build2 (BIT_AND_EXPR, TREE_TYPE (t), t,
5135 size_int (-align));
5136 t = fold_convert (TREE_TYPE (ovf), t);
5137 }
5138 gimplify_expr (&t, pre_p, NULL, is_gimple_val, fb_rvalue);
5139
5140 t2 = build2 (GIMPLE_MODIFY_STMT, void_type_node, addr, t);
5141 gimplify_and_add (t2, pre_p);
5142
5143 t = build2 (POINTER_PLUS_EXPR, TREE_TYPE (t), t,
5144 size_int (rsize * UNITS_PER_WORD));
5145 t = build2 (GIMPLE_MODIFY_STMT, TREE_TYPE (ovf), ovf, t);
5146 gimplify_and_add (t, pre_p);
5147
5148 if (container)
5149 {
5150 t = build1 (LABEL_EXPR, void_type_node, lab_over);
5151 append_to_statement_list (t, pre_p);
5152 }
5153
5154 ptrtype = build_pointer_type (type);
5155 addr = fold_convert (ptrtype, addr);
5156
5157 if (indirect_p)
5158 addr = build_va_arg_indirect_ref (addr);
5159 return build_va_arg_indirect_ref (addr);
5160 }
5161 \f
5162 /* Return nonzero if OPNUM's MEM should be matched
5163 in movabs* patterns. */
5164
5165 int
5166 ix86_check_movabs (rtx insn, int opnum)
5167 {
5168 rtx set, mem;
5169
5170 set = PATTERN (insn);
5171 if (GET_CODE (set) == PARALLEL)
5172 set = XVECEXP (set, 0, 0);
5173 gcc_assert (GET_CODE (set) == SET);
5174 mem = XEXP (set, opnum);
5175 while (GET_CODE (mem) == SUBREG)
5176 mem = SUBREG_REG (mem);
5177 gcc_assert (MEM_P (mem));
5178 return (volatile_ok || !MEM_VOLATILE_P (mem));
5179 }
5180 \f
5181 /* Initialize the table of extra 80387 mathematical constants. */
5182
5183 static void
5184 init_ext_80387_constants (void)
5185 {
5186 static const char * cst[5] =
5187 {
5188 "0.3010299956639811952256464283594894482", /* 0: fldlg2 */
5189 "0.6931471805599453094286904741849753009", /* 1: fldln2 */
5190 "1.4426950408889634073876517827983434472", /* 2: fldl2e */
5191 "3.3219280948873623478083405569094566090", /* 3: fldl2t */
5192 "3.1415926535897932385128089594061862044", /* 4: fldpi */
5193 };
5194 int i;
5195
5196 for (i = 0; i < 5; i++)
5197 {
5198 real_from_string (&ext_80387_constants_table[i], cst[i]);
5199 /* Ensure each constant is rounded to XFmode precision. */
5200 real_convert (&ext_80387_constants_table[i],
5201 XFmode, &ext_80387_constants_table[i]);
5202 }
5203
5204 ext_80387_constants_init = 1;
5205 }
5206
5207 /* Return true if the constant is something that can be loaded with
5208 a special instruction. */
5209
5210 int
5211 standard_80387_constant_p (rtx x)
5212 {
5213 enum machine_mode mode = GET_MODE (x);
5214
5215 REAL_VALUE_TYPE r;
5216
5217 if (!(X87_FLOAT_MODE_P (mode) && (GET_CODE (x) == CONST_DOUBLE)))
5218 return -1;
5219
5220 if (x == CONST0_RTX (mode))
5221 return 1;
5222 if (x == CONST1_RTX (mode))
5223 return 2;
5224
5225 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
5226
5227 /* For XFmode constants, try to find a special 80387 instruction when
5228 optimizing for size or on those CPUs that benefit from them. */
5229 if (mode == XFmode
5230 && (optimize_size || TARGET_EXT_80387_CONSTANTS))
5231 {
5232 int i;
5233
5234 if (! ext_80387_constants_init)
5235 init_ext_80387_constants ();
5236
5237 for (i = 0; i < 5; i++)
5238 if (real_identical (&r, &ext_80387_constants_table[i]))
5239 return i + 3;
5240 }
5241
5242 /* Load of the constant -0.0 or -1.0 will be split as
5243 fldz;fchs or fld1;fchs sequence. */
5244 if (real_isnegzero (&r))
5245 return 8;
5246 if (real_identical (&r, &dconstm1))
5247 return 9;
5248
5249 return 0;
5250 }
5251
5252 /* Return the opcode of the special instruction to be used to load
5253 the constant X. */
5254
5255 const char *
5256 standard_80387_constant_opcode (rtx x)
5257 {
5258 switch (standard_80387_constant_p (x))
5259 {
5260 case 1:
5261 return "fldz";
5262 case 2:
5263 return "fld1";
5264 case 3:
5265 return "fldlg2";
5266 case 4:
5267 return "fldln2";
5268 case 5:
5269 return "fldl2e";
5270 case 6:
5271 return "fldl2t";
5272 case 7:
5273 return "fldpi";
5274 case 8:
5275 case 9:
5276 return "#";
5277 default:
5278 gcc_unreachable ();
5279 }
5280 }
5281
5282 /* Return the CONST_DOUBLE representing the 80387 constant that is
5283 loaded by the specified special instruction. The argument IDX
5284 matches the return value from standard_80387_constant_p. */
5285
5286 rtx
5287 standard_80387_constant_rtx (int idx)
5288 {
5289 int i;
5290
5291 if (! ext_80387_constants_init)
5292 init_ext_80387_constants ();
5293
5294 switch (idx)
5295 {
5296 case 3:
5297 case 4:
5298 case 5:
5299 case 6:
5300 case 7:
5301 i = idx - 3;
5302 break;
5303
5304 default:
5305 gcc_unreachable ();
5306 }
5307
5308 return CONST_DOUBLE_FROM_REAL_VALUE (ext_80387_constants_table[i],
5309 XFmode);
5310 }
5311
5312 /* Return 1 if mode is a valid mode for sse. */
5313 static int
5314 standard_sse_mode_p (enum machine_mode mode)
5315 {
5316 switch (mode)
5317 {
5318 case V16QImode:
5319 case V8HImode:
5320 case V4SImode:
5321 case V2DImode:
5322 case V4SFmode:
5323 case V2DFmode:
5324 return 1;
5325
5326 default:
5327 return 0;
5328 }
5329 }
5330
5331 /* Return 1 if X is FP constant we can load to SSE register w/o using memory.
5332 */
5333 int
5334 standard_sse_constant_p (rtx x)
5335 {
5336 enum machine_mode mode = GET_MODE (x);
5337
5338 if (x == const0_rtx || x == CONST0_RTX (GET_MODE (x)))
5339 return 1;
5340 if (vector_all_ones_operand (x, mode)
5341 && standard_sse_mode_p (mode))
5342 return TARGET_SSE2 ? 2 : -1;
5343
5344 return 0;
5345 }
5346
5347 /* Return the opcode of the special instruction to be used to load
5348 the constant X. */
5349
5350 const char *
5351 standard_sse_constant_opcode (rtx insn, rtx x)
5352 {
5353 switch (standard_sse_constant_p (x))
5354 {
5355 case 1:
5356 if (get_attr_mode (insn) == MODE_V4SF)
5357 return "xorps\t%0, %0";
5358 else if (get_attr_mode (insn) == MODE_V2DF)
5359 return "xorpd\t%0, %0";
5360 else
5361 return "pxor\t%0, %0";
5362 case 2:
5363 return "pcmpeqd\t%0, %0";
5364 }
5365 gcc_unreachable ();
5366 }
5367
5368 /* Returns 1 if OP contains a symbol reference */
5369
5370 int
5371 symbolic_reference_mentioned_p (rtx op)
5372 {
5373 const char *fmt;
5374 int i;
5375
5376 if (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == LABEL_REF)
5377 return 1;
5378
5379 fmt = GET_RTX_FORMAT (GET_CODE (op));
5380 for (i = GET_RTX_LENGTH (GET_CODE (op)) - 1; i >= 0; i--)
5381 {
5382 if (fmt[i] == 'E')
5383 {
5384 int j;
5385
5386 for (j = XVECLEN (op, i) - 1; j >= 0; j--)
5387 if (symbolic_reference_mentioned_p (XVECEXP (op, i, j)))
5388 return 1;
5389 }
5390
5391 else if (fmt[i] == 'e' && symbolic_reference_mentioned_p (XEXP (op, i)))
5392 return 1;
5393 }
5394
5395 return 0;
5396 }
5397
5398 /* Return 1 if it is appropriate to emit `ret' instructions in the
5399 body of a function. Do this only if the epilogue is simple, needing a
5400 couple of insns. Prior to reloading, we can't tell how many registers
5401 must be saved, so return 0 then. Return 0 if there is no frame
5402 marker to de-allocate. */
5403
5404 int
5405 ix86_can_use_return_insn_p (void)
5406 {
5407 struct ix86_frame frame;
5408
5409 if (! reload_completed || frame_pointer_needed)
5410 return 0;
5411
5412 /* Don't allow more than 32 pop, since that's all we can do
5413 with one instruction. */
5414 if (current_function_pops_args
5415 && current_function_args_size >= 32768)
5416 return 0;
5417
5418 ix86_compute_frame_layout (&frame);
5419 return frame.to_allocate == 0 && frame.nregs == 0;
5420 }
5421 \f
5422 /* Value should be nonzero if functions must have frame pointers.
5423 Zero means the frame pointer need not be set up (and parms may
5424 be accessed via the stack pointer) in functions that seem suitable. */
5425
5426 int
5427 ix86_frame_pointer_required (void)
5428 {
5429 /* If we accessed previous frames, then the generated code expects
5430 to be able to access the saved ebp value in our frame. */
5431 if (cfun->machine->accesses_prev_frame)
5432 return 1;
5433
5434 /* Several x86 os'es need a frame pointer for other reasons,
5435 usually pertaining to setjmp. */
5436 if (SUBTARGET_FRAME_POINTER_REQUIRED)
5437 return 1;
5438
5439 /* In override_options, TARGET_OMIT_LEAF_FRAME_POINTER turns off
5440 the frame pointer by default. Turn it back on now if we've not
5441 got a leaf function. */
5442 if (TARGET_OMIT_LEAF_FRAME_POINTER
5443 && (!current_function_is_leaf
5444 || ix86_current_function_calls_tls_descriptor))
5445 return 1;
5446
5447 if (current_function_profile)
5448 return 1;
5449
5450 return 0;
5451 }
5452
5453 /* Record that the current function accesses previous call frames. */
5454
5455 void
5456 ix86_setup_frame_addresses (void)
5457 {
5458 cfun->machine->accesses_prev_frame = 1;
5459 }
5460 \f
5461 #if (defined(HAVE_GAS_HIDDEN) && (SUPPORTS_ONE_ONLY - 0)) || TARGET_MACHO
5462 # define USE_HIDDEN_LINKONCE 1
5463 #else
5464 # define USE_HIDDEN_LINKONCE 0
5465 #endif
5466
5467 static int pic_labels_used;
5468
5469 /* Fills in the label name that should be used for a pc thunk for
5470 the given register. */
5471
5472 static void
5473 get_pc_thunk_name (char name[32], unsigned int regno)
5474 {
5475 gcc_assert (!TARGET_64BIT);
5476
5477 if (USE_HIDDEN_LINKONCE)
5478 sprintf (name, "__i686.get_pc_thunk.%s", reg_names[regno]);
5479 else
5480 ASM_GENERATE_INTERNAL_LABEL (name, "LPR", regno);
5481 }
5482
5483
5484 /* This function generates code for -fpic that loads %ebx with
5485 the return address of the caller and then returns. */
5486
5487 void
5488 ix86_file_end (void)
5489 {
5490 rtx xops[2];
5491 int regno;
5492
5493 for (regno = 0; regno < 8; ++regno)
5494 {
5495 char name[32];
5496
5497 if (! ((pic_labels_used >> regno) & 1))
5498 continue;
5499
5500 get_pc_thunk_name (name, regno);
5501
5502 #if TARGET_MACHO
5503 if (TARGET_MACHO)
5504 {
5505 switch_to_section (darwin_sections[text_coal_section]);
5506 fputs ("\t.weak_definition\t", asm_out_file);
5507 assemble_name (asm_out_file, name);
5508 fputs ("\n\t.private_extern\t", asm_out_file);
5509 assemble_name (asm_out_file, name);
5510 fputs ("\n", asm_out_file);
5511 ASM_OUTPUT_LABEL (asm_out_file, name);
5512 }
5513 else
5514 #endif
5515 if (USE_HIDDEN_LINKONCE)
5516 {
5517 tree decl;
5518
5519 decl = build_decl (FUNCTION_DECL, get_identifier (name),
5520 error_mark_node);
5521 TREE_PUBLIC (decl) = 1;
5522 TREE_STATIC (decl) = 1;
5523 DECL_ONE_ONLY (decl) = 1;
5524
5525 (*targetm.asm_out.unique_section) (decl, 0);
5526 switch_to_section (get_named_section (decl, NULL, 0));
5527
5528 (*targetm.asm_out.globalize_label) (asm_out_file, name);
5529 fputs ("\t.hidden\t", asm_out_file);
5530 assemble_name (asm_out_file, name);
5531 fputc ('\n', asm_out_file);
5532 ASM_DECLARE_FUNCTION_NAME (asm_out_file, name, decl);
5533 }
5534 else
5535 {
5536 switch_to_section (text_section);
5537 ASM_OUTPUT_LABEL (asm_out_file, name);
5538 }
5539
5540 xops[0] = gen_rtx_REG (SImode, regno);
5541 xops[1] = gen_rtx_MEM (SImode, stack_pointer_rtx);
5542 output_asm_insn ("mov{l}\t{%1, %0|%0, %1}", xops);
5543 output_asm_insn ("ret", xops);
5544 }
5545
5546 if (NEED_INDICATE_EXEC_STACK)
5547 file_end_indicate_exec_stack ();
5548 }
5549
5550 /* Emit code for the SET_GOT patterns. */
5551
5552 const char *
5553 output_set_got (rtx dest, rtx label ATTRIBUTE_UNUSED)
5554 {
5555 rtx xops[3];
5556
5557 xops[0] = dest;
5558
5559 if (TARGET_VXWORKS_RTP && flag_pic)
5560 {
5561 /* Load (*VXWORKS_GOTT_BASE) into the PIC register. */
5562 xops[2] = gen_rtx_MEM (Pmode,
5563 gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE));
5564 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5565
5566 /* Load (*VXWORKS_GOTT_BASE)[VXWORKS_GOTT_INDEX] into the PIC register.
5567 Use %P and a local symbol in order to print VXWORKS_GOTT_INDEX as
5568 an unadorned address. */
5569 xops[2] = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
5570 SYMBOL_REF_FLAGS (xops[2]) |= SYMBOL_FLAG_LOCAL;
5571 output_asm_insn ("mov{l}\t{%P2(%0), %0|%0, DWORD PTR %P2[%0]}", xops);
5572 return "";
5573 }
5574
5575 xops[1] = gen_rtx_SYMBOL_REF (Pmode, GOT_SYMBOL_NAME);
5576
5577 if (! TARGET_DEEP_BRANCH_PREDICTION || !flag_pic)
5578 {
5579 xops[2] = gen_rtx_LABEL_REF (Pmode, label ? label : gen_label_rtx ());
5580
5581 if (!flag_pic)
5582 output_asm_insn ("mov{l}\t{%2, %0|%0, %2}", xops);
5583 else
5584 output_asm_insn ("call\t%a2", xops);
5585
5586 #if TARGET_MACHO
5587 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5588 is what will be referenced by the Mach-O PIC subsystem. */
5589 if (!label)
5590 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5591 #endif
5592
5593 (*targetm.asm_out.internal_label) (asm_out_file, "L",
5594 CODE_LABEL_NUMBER (XEXP (xops[2], 0)));
5595
5596 if (flag_pic)
5597 output_asm_insn ("pop{l}\t%0", xops);
5598 }
5599 else
5600 {
5601 char name[32];
5602 get_pc_thunk_name (name, REGNO (dest));
5603 pic_labels_used |= 1 << REGNO (dest);
5604
5605 xops[2] = gen_rtx_SYMBOL_REF (Pmode, ggc_strdup (name));
5606 xops[2] = gen_rtx_MEM (QImode, xops[2]);
5607 output_asm_insn ("call\t%X2", xops);
5608 /* Output the Mach-O "canonical" label name ("Lxx$pb") here too. This
5609 is what will be referenced by the Mach-O PIC subsystem. */
5610 #if TARGET_MACHO
5611 if (!label)
5612 ASM_OUTPUT_LABEL (asm_out_file, machopic_function_base_name ());
5613 else
5614 targetm.asm_out.internal_label (asm_out_file, "L",
5615 CODE_LABEL_NUMBER (label));
5616 #endif
5617 }
5618
5619 if (TARGET_MACHO)
5620 return "";
5621
5622 if (!flag_pic || TARGET_DEEP_BRANCH_PREDICTION)
5623 output_asm_insn ("add{l}\t{%1, %0|%0, %1}", xops);
5624 else
5625 output_asm_insn ("add{l}\t{%1+[.-%a2], %0|%0, %1+(.-%a2)}", xops);
5626
5627 return "";
5628 }
5629
5630 /* Generate an "push" pattern for input ARG. */
5631
5632 static rtx
5633 gen_push (rtx arg)
5634 {
5635 return gen_rtx_SET (VOIDmode,
5636 gen_rtx_MEM (Pmode,
5637 gen_rtx_PRE_DEC (Pmode,
5638 stack_pointer_rtx)),
5639 arg);
5640 }
5641
5642 /* Return >= 0 if there is an unused call-clobbered register available
5643 for the entire function. */
5644
5645 static unsigned int
5646 ix86_select_alt_pic_regnum (void)
5647 {
5648 if (current_function_is_leaf && !current_function_profile
5649 && !ix86_current_function_calls_tls_descriptor)
5650 {
5651 int i;
5652 for (i = 2; i >= 0; --i)
5653 if (!df_regs_ever_live_p (i))
5654 return i;
5655 }
5656
5657 return INVALID_REGNUM;
5658 }
5659
5660 /* Return 1 if we need to save REGNO. */
5661 static int
5662 ix86_save_reg (unsigned int regno, int maybe_eh_return)
5663 {
5664 if (pic_offset_table_rtx
5665 && regno == REAL_PIC_OFFSET_TABLE_REGNUM
5666 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
5667 || current_function_profile
5668 || current_function_calls_eh_return
5669 || current_function_uses_const_pool))
5670 {
5671 if (ix86_select_alt_pic_regnum () != INVALID_REGNUM)
5672 return 0;
5673 return 1;
5674 }
5675
5676 if (current_function_calls_eh_return && maybe_eh_return)
5677 {
5678 unsigned i;
5679 for (i = 0; ; i++)
5680 {
5681 unsigned test = EH_RETURN_DATA_REGNO (i);
5682 if (test == INVALID_REGNUM)
5683 break;
5684 if (test == regno)
5685 return 1;
5686 }
5687 }
5688
5689 if (cfun->machine->force_align_arg_pointer
5690 && regno == REGNO (cfun->machine->force_align_arg_pointer))
5691 return 1;
5692
5693 return (df_regs_ever_live_p (regno)
5694 && !call_used_regs[regno]
5695 && !fixed_regs[regno]
5696 && (regno != HARD_FRAME_POINTER_REGNUM || !frame_pointer_needed));
5697 }
5698
5699 /* Return number of registers to be saved on the stack. */
5700
5701 static int
5702 ix86_nsaved_regs (void)
5703 {
5704 int nregs = 0;
5705 int regno;
5706
5707 for (regno = FIRST_PSEUDO_REGISTER - 1; regno >= 0; regno--)
5708 if (ix86_save_reg (regno, true))
5709 nregs++;
5710 return nregs;
5711 }
5712
5713 /* Return the offset between two registers, one to be eliminated, and the other
5714 its replacement, at the start of a routine. */
5715
5716 HOST_WIDE_INT
5717 ix86_initial_elimination_offset (int from, int to)
5718 {
5719 struct ix86_frame frame;
5720 ix86_compute_frame_layout (&frame);
5721
5722 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
5723 return frame.hard_frame_pointer_offset;
5724 else if (from == FRAME_POINTER_REGNUM
5725 && to == HARD_FRAME_POINTER_REGNUM)
5726 return frame.hard_frame_pointer_offset - frame.frame_pointer_offset;
5727 else
5728 {
5729 gcc_assert (to == STACK_POINTER_REGNUM);
5730
5731 if (from == ARG_POINTER_REGNUM)
5732 return frame.stack_pointer_offset;
5733
5734 gcc_assert (from == FRAME_POINTER_REGNUM);
5735 return frame.stack_pointer_offset - frame.frame_pointer_offset;
5736 }
5737 }
5738
5739 /* Fill structure ix86_frame about frame of currently computed function. */
5740
5741 static void
5742 ix86_compute_frame_layout (struct ix86_frame *frame)
5743 {
5744 HOST_WIDE_INT total_size;
5745 unsigned int stack_alignment_needed;
5746 HOST_WIDE_INT offset;
5747 unsigned int preferred_alignment;
5748 HOST_WIDE_INT size = get_frame_size ();
5749
5750 frame->nregs = ix86_nsaved_regs ();
5751 total_size = size;
5752
5753 stack_alignment_needed = cfun->stack_alignment_needed / BITS_PER_UNIT;
5754 preferred_alignment = cfun->preferred_stack_boundary / BITS_PER_UNIT;
5755
5756 /* During reload iteration the amount of registers saved can change.
5757 Recompute the value as needed. Do not recompute when amount of registers
5758 didn't change as reload does multiple calls to the function and does not
5759 expect the decision to change within single iteration. */
5760 if (!optimize_size
5761 && cfun->machine->use_fast_prologue_epilogue_nregs != frame->nregs)
5762 {
5763 int count = frame->nregs;
5764
5765 cfun->machine->use_fast_prologue_epilogue_nregs = count;
5766 /* The fast prologue uses move instead of push to save registers. This
5767 is significantly longer, but also executes faster as modern hardware
5768 can execute the moves in parallel, but can't do that for push/pop.
5769
5770 Be careful about choosing what prologue to emit: When function takes
5771 many instructions to execute we may use slow version as well as in
5772 case function is known to be outside hot spot (this is known with
5773 feedback only). Weight the size of function by number of registers
5774 to save as it is cheap to use one or two push instructions but very
5775 slow to use many of them. */
5776 if (count)
5777 count = (count - 1) * FAST_PROLOGUE_INSN_COUNT;
5778 if (cfun->function_frequency < FUNCTION_FREQUENCY_NORMAL
5779 || (flag_branch_probabilities
5780 && cfun->function_frequency < FUNCTION_FREQUENCY_HOT))
5781 cfun->machine->use_fast_prologue_epilogue = false;
5782 else
5783 cfun->machine->use_fast_prologue_epilogue
5784 = !expensive_function_p (count);
5785 }
5786 if (TARGET_PROLOGUE_USING_MOVE
5787 && cfun->machine->use_fast_prologue_epilogue)
5788 frame->save_regs_using_mov = true;
5789 else
5790 frame->save_regs_using_mov = false;
5791
5792
5793 /* Skip return address and saved base pointer. */
5794 offset = frame_pointer_needed ? UNITS_PER_WORD * 2 : UNITS_PER_WORD;
5795
5796 frame->hard_frame_pointer_offset = offset;
5797
5798 /* Do some sanity checking of stack_alignment_needed and
5799 preferred_alignment, since i386 port is the only using those features
5800 that may break easily. */
5801
5802 gcc_assert (!size || stack_alignment_needed);
5803 gcc_assert (preferred_alignment >= STACK_BOUNDARY / BITS_PER_UNIT);
5804 gcc_assert (preferred_alignment <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5805 gcc_assert (stack_alignment_needed
5806 <= PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT);
5807
5808 if (stack_alignment_needed < STACK_BOUNDARY / BITS_PER_UNIT)
5809 stack_alignment_needed = STACK_BOUNDARY / BITS_PER_UNIT;
5810
5811 /* Register save area */
5812 offset += frame->nregs * UNITS_PER_WORD;
5813
5814 /* Va-arg area */
5815 if (ix86_save_varrargs_registers)
5816 {
5817 offset += X86_64_VARARGS_SIZE;
5818 frame->va_arg_size = X86_64_VARARGS_SIZE;
5819 }
5820 else
5821 frame->va_arg_size = 0;
5822
5823 /* Align start of frame for local function. */
5824 frame->padding1 = ((offset + stack_alignment_needed - 1)
5825 & -stack_alignment_needed) - offset;
5826
5827 offset += frame->padding1;
5828
5829 /* Frame pointer points here. */
5830 frame->frame_pointer_offset = offset;
5831
5832 offset += size;
5833
5834 /* Add outgoing arguments area. Can be skipped if we eliminated
5835 all the function calls as dead code.
5836 Skipping is however impossible when function calls alloca. Alloca
5837 expander assumes that last current_function_outgoing_args_size
5838 of stack frame are unused. */
5839 if (ACCUMULATE_OUTGOING_ARGS
5840 && (!current_function_is_leaf || current_function_calls_alloca
5841 || ix86_current_function_calls_tls_descriptor))
5842 {
5843 offset += current_function_outgoing_args_size;
5844 frame->outgoing_arguments_size = current_function_outgoing_args_size;
5845 }
5846 else
5847 frame->outgoing_arguments_size = 0;
5848
5849 /* Align stack boundary. Only needed if we're calling another function
5850 or using alloca. */
5851 if (!current_function_is_leaf || current_function_calls_alloca
5852 || ix86_current_function_calls_tls_descriptor)
5853 frame->padding2 = ((offset + preferred_alignment - 1)
5854 & -preferred_alignment) - offset;
5855 else
5856 frame->padding2 = 0;
5857
5858 offset += frame->padding2;
5859
5860 /* We've reached end of stack frame. */
5861 frame->stack_pointer_offset = offset;
5862
5863 /* Size prologue needs to allocate. */
5864 frame->to_allocate =
5865 (size + frame->padding1 + frame->padding2
5866 + frame->outgoing_arguments_size + frame->va_arg_size);
5867
5868 if ((!frame->to_allocate && frame->nregs <= 1)
5869 || (TARGET_64BIT && frame->to_allocate >= (HOST_WIDE_INT) 0x80000000))
5870 frame->save_regs_using_mov = false;
5871
5872 if (TARGET_RED_ZONE && current_function_sp_is_unchanging
5873 && current_function_is_leaf
5874 && !ix86_current_function_calls_tls_descriptor)
5875 {
5876 frame->red_zone_size = frame->to_allocate;
5877 if (frame->save_regs_using_mov)
5878 frame->red_zone_size += frame->nregs * UNITS_PER_WORD;
5879 if (frame->red_zone_size > RED_ZONE_SIZE - RED_ZONE_RESERVE)
5880 frame->red_zone_size = RED_ZONE_SIZE - RED_ZONE_RESERVE;
5881 }
5882 else
5883 frame->red_zone_size = 0;
5884 frame->to_allocate -= frame->red_zone_size;
5885 frame->stack_pointer_offset -= frame->red_zone_size;
5886 #if 0
5887 fprintf (stderr, "\n");
5888 fprintf (stderr, "nregs: %ld\n", (long)frame->nregs);
5889 fprintf (stderr, "size: %ld\n", (long)size);
5890 fprintf (stderr, "alignment1: %ld\n", (long)stack_alignment_needed);
5891 fprintf (stderr, "padding1: %ld\n", (long)frame->padding1);
5892 fprintf (stderr, "va_arg: %ld\n", (long)frame->va_arg_size);
5893 fprintf (stderr, "padding2: %ld\n", (long)frame->padding2);
5894 fprintf (stderr, "to_allocate: %ld\n", (long)frame->to_allocate);
5895 fprintf (stderr, "red_zone_size: %ld\n", (long)frame->red_zone_size);
5896 fprintf (stderr, "frame_pointer_offset: %ld\n", (long)frame->frame_pointer_offset);
5897 fprintf (stderr, "hard_frame_pointer_offset: %ld\n",
5898 (long)frame->hard_frame_pointer_offset);
5899 fprintf (stderr, "stack_pointer_offset: %ld\n", (long)frame->stack_pointer_offset);
5900 fprintf (stderr, "current_function_is_leaf: %ld\n", (long)current_function_is_leaf);
5901 fprintf (stderr, "current_function_calls_alloca: %ld\n", (long)current_function_calls_alloca);
5902 fprintf (stderr, "x86_current_function_calls_tls_descriptor: %ld\n", (long)ix86_current_function_calls_tls_descriptor);
5903 #endif
5904 }
5905
5906 /* Emit code to save registers in the prologue. */
5907
5908 static void
5909 ix86_emit_save_regs (void)
5910 {
5911 unsigned int regno;
5912 rtx insn;
5913
5914 for (regno = FIRST_PSEUDO_REGISTER; regno-- > 0; )
5915 if (ix86_save_reg (regno, true))
5916 {
5917 insn = emit_insn (gen_push (gen_rtx_REG (Pmode, regno)));
5918 RTX_FRAME_RELATED_P (insn) = 1;
5919 }
5920 }
5921
5922 /* Emit code to save registers using MOV insns. First register
5923 is restored from POINTER + OFFSET. */
5924 static void
5925 ix86_emit_save_regs_using_mov (rtx pointer, HOST_WIDE_INT offset)
5926 {
5927 unsigned int regno;
5928 rtx insn;
5929
5930 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
5931 if (ix86_save_reg (regno, true))
5932 {
5933 insn = emit_move_insn (adjust_address (gen_rtx_MEM (Pmode, pointer),
5934 Pmode, offset),
5935 gen_rtx_REG (Pmode, regno));
5936 RTX_FRAME_RELATED_P (insn) = 1;
5937 offset += UNITS_PER_WORD;
5938 }
5939 }
5940
5941 /* Expand prologue or epilogue stack adjustment.
5942 The pattern exist to put a dependency on all ebp-based memory accesses.
5943 STYLE should be negative if instructions should be marked as frame related,
5944 zero if %r11 register is live and cannot be freely used and positive
5945 otherwise. */
5946
5947 static void
5948 pro_epilogue_adjust_stack (rtx dest, rtx src, rtx offset, int style)
5949 {
5950 rtx insn;
5951
5952 if (! TARGET_64BIT)
5953 insn = emit_insn (gen_pro_epilogue_adjust_stack_1 (dest, src, offset));
5954 else if (x86_64_immediate_operand (offset, DImode))
5955 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64 (dest, src, offset));
5956 else
5957 {
5958 rtx r11;
5959 /* r11 is used by indirect sibcall return as well, set before the
5960 epilogue and used after the epilogue. ATM indirect sibcall
5961 shouldn't be used together with huge frame sizes in one
5962 function because of the frame_size check in sibcall.c. */
5963 gcc_assert (style);
5964 r11 = gen_rtx_REG (DImode, R11_REG);
5965 insn = emit_insn (gen_rtx_SET (DImode, r11, offset));
5966 if (style < 0)
5967 RTX_FRAME_RELATED_P (insn) = 1;
5968 insn = emit_insn (gen_pro_epilogue_adjust_stack_rex64_2 (dest, src, r11,
5969 offset));
5970 }
5971 if (style < 0)
5972 RTX_FRAME_RELATED_P (insn) = 1;
5973 }
5974
5975 /* Handle the TARGET_INTERNAL_ARG_POINTER hook. */
5976
5977 static rtx
5978 ix86_internal_arg_pointer (void)
5979 {
5980 bool has_force_align_arg_pointer =
5981 (0 != lookup_attribute (ix86_force_align_arg_pointer_string,
5982 TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl))));
5983 if ((FORCE_PREFERRED_STACK_BOUNDARY_IN_MAIN
5984 && DECL_NAME (current_function_decl)
5985 && MAIN_NAME_P (DECL_NAME (current_function_decl))
5986 && DECL_FILE_SCOPE_P (current_function_decl))
5987 || ix86_force_align_arg_pointer
5988 || has_force_align_arg_pointer)
5989 {
5990 /* Nested functions can't realign the stack due to a register
5991 conflict. */
5992 if (DECL_CONTEXT (current_function_decl)
5993 && TREE_CODE (DECL_CONTEXT (current_function_decl)) == FUNCTION_DECL)
5994 {
5995 if (ix86_force_align_arg_pointer)
5996 warning (0, "-mstackrealign ignored for nested functions");
5997 if (has_force_align_arg_pointer)
5998 error ("%s not supported for nested functions",
5999 ix86_force_align_arg_pointer_string);
6000 return virtual_incoming_args_rtx;
6001 }
6002 cfun->machine->force_align_arg_pointer = gen_rtx_REG (Pmode, 2);
6003 return copy_to_reg (cfun->machine->force_align_arg_pointer);
6004 }
6005 else
6006 return virtual_incoming_args_rtx;
6007 }
6008
6009 /* Handle the TARGET_DWARF_HANDLE_FRAME_UNSPEC hook.
6010 This is called from dwarf2out.c to emit call frame instructions
6011 for frame-related insns containing UNSPECs and UNSPEC_VOLATILEs. */
6012 static void
6013 ix86_dwarf_handle_frame_unspec (const char *label, rtx pattern, int index)
6014 {
6015 rtx unspec = SET_SRC (pattern);
6016 gcc_assert (GET_CODE (unspec) == UNSPEC);
6017
6018 switch (index)
6019 {
6020 case UNSPEC_REG_SAVE:
6021 dwarf2out_reg_save_reg (label, XVECEXP (unspec, 0, 0),
6022 SET_DEST (pattern));
6023 break;
6024 case UNSPEC_DEF_CFA:
6025 dwarf2out_def_cfa (label, REGNO (SET_DEST (pattern)),
6026 INTVAL (XVECEXP (unspec, 0, 0)));
6027 break;
6028 default:
6029 gcc_unreachable ();
6030 }
6031 }
6032
6033 /* Expand the prologue into a bunch of separate insns. */
6034
6035 void
6036 ix86_expand_prologue (void)
6037 {
6038 rtx insn;
6039 bool pic_reg_used;
6040 struct ix86_frame frame;
6041 HOST_WIDE_INT allocate;
6042
6043 ix86_compute_frame_layout (&frame);
6044
6045 if (cfun->machine->force_align_arg_pointer)
6046 {
6047 rtx x, y;
6048
6049 /* Grab the argument pointer. */
6050 x = plus_constant (stack_pointer_rtx, 4);
6051 y = cfun->machine->force_align_arg_pointer;
6052 insn = emit_insn (gen_rtx_SET (VOIDmode, y, x));
6053 RTX_FRAME_RELATED_P (insn) = 1;
6054
6055 /* The unwind info consists of two parts: install the fafp as the cfa,
6056 and record the fafp as the "save register" of the stack pointer.
6057 The later is there in order that the unwinder can see where it
6058 should restore the stack pointer across the and insn. */
6059 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, const0_rtx), UNSPEC_DEF_CFA);
6060 x = gen_rtx_SET (VOIDmode, y, x);
6061 RTX_FRAME_RELATED_P (x) = 1;
6062 y = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, stack_pointer_rtx),
6063 UNSPEC_REG_SAVE);
6064 y = gen_rtx_SET (VOIDmode, cfun->machine->force_align_arg_pointer, y);
6065 RTX_FRAME_RELATED_P (y) = 1;
6066 x = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y));
6067 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6068 REG_NOTES (insn) = x;
6069
6070 /* Align the stack. */
6071 emit_insn (gen_andsi3 (stack_pointer_rtx, stack_pointer_rtx,
6072 GEN_INT (-16)));
6073
6074 /* And here we cheat like madmen with the unwind info. We force the
6075 cfa register back to sp+4, which is exactly what it was at the
6076 start of the function. Re-pushing the return address results in
6077 the return at the same spot relative to the cfa, and thus is
6078 correct wrt the unwind info. */
6079 x = cfun->machine->force_align_arg_pointer;
6080 x = gen_frame_mem (Pmode, plus_constant (x, -4));
6081 insn = emit_insn (gen_push (x));
6082 RTX_FRAME_RELATED_P (insn) = 1;
6083
6084 x = GEN_INT (4);
6085 x = gen_rtx_UNSPEC (VOIDmode, gen_rtvec (1, x), UNSPEC_DEF_CFA);
6086 x = gen_rtx_SET (VOIDmode, stack_pointer_rtx, x);
6087 x = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR, x, NULL);
6088 REG_NOTES (insn) = x;
6089 }
6090
6091 /* Note: AT&T enter does NOT have reversed args. Enter is probably
6092 slower on all targets. Also sdb doesn't like it. */
6093
6094 if (frame_pointer_needed)
6095 {
6096 insn = emit_insn (gen_push (hard_frame_pointer_rtx));
6097 RTX_FRAME_RELATED_P (insn) = 1;
6098
6099 insn = emit_move_insn (hard_frame_pointer_rtx, stack_pointer_rtx);
6100 RTX_FRAME_RELATED_P (insn) = 1;
6101 }
6102
6103 allocate = frame.to_allocate;
6104
6105 if (!frame.save_regs_using_mov)
6106 ix86_emit_save_regs ();
6107 else
6108 allocate += frame.nregs * UNITS_PER_WORD;
6109
6110 /* When using red zone we may start register saving before allocating
6111 the stack frame saving one cycle of the prologue. */
6112 if (TARGET_RED_ZONE && frame.save_regs_using_mov)
6113 ix86_emit_save_regs_using_mov (frame_pointer_needed ? hard_frame_pointer_rtx
6114 : stack_pointer_rtx,
6115 -frame.nregs * UNITS_PER_WORD);
6116
6117 if (allocate == 0)
6118 ;
6119 else if (! TARGET_STACK_PROBE || allocate < CHECK_STACK_LIMIT)
6120 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6121 GEN_INT (-allocate), -1);
6122 else
6123 {
6124 /* Only valid for Win32. */
6125 rtx eax = gen_rtx_REG (Pmode, 0);
6126 bool eax_live;
6127 rtx t;
6128
6129 gcc_assert (!TARGET_64BIT || TARGET_64BIT_MS_ABI);
6130
6131 if (TARGET_64BIT_MS_ABI)
6132 eax_live = false;
6133 else
6134 eax_live = ix86_eax_live_at_start_p ();
6135
6136 if (eax_live)
6137 {
6138 emit_insn (gen_push (eax));
6139 allocate -= UNITS_PER_WORD;
6140 }
6141
6142 emit_move_insn (eax, GEN_INT (allocate));
6143
6144 if (TARGET_64BIT)
6145 insn = gen_allocate_stack_worker_64 (eax);
6146 else
6147 insn = gen_allocate_stack_worker_32 (eax);
6148 insn = emit_insn (insn);
6149 RTX_FRAME_RELATED_P (insn) = 1;
6150 t = gen_rtx_PLUS (Pmode, stack_pointer_rtx, GEN_INT (-allocate));
6151 t = gen_rtx_SET (VOIDmode, stack_pointer_rtx, t);
6152 REG_NOTES (insn) = gen_rtx_EXPR_LIST (REG_FRAME_RELATED_EXPR,
6153 t, REG_NOTES (insn));
6154
6155 if (eax_live)
6156 {
6157 if (frame_pointer_needed)
6158 t = plus_constant (hard_frame_pointer_rtx,
6159 allocate
6160 - frame.to_allocate
6161 - frame.nregs * UNITS_PER_WORD);
6162 else
6163 t = plus_constant (stack_pointer_rtx, allocate);
6164 emit_move_insn (eax, gen_rtx_MEM (Pmode, t));
6165 }
6166 }
6167
6168 if (frame.save_regs_using_mov && !TARGET_RED_ZONE)
6169 {
6170 if (!frame_pointer_needed || !frame.to_allocate)
6171 ix86_emit_save_regs_using_mov (stack_pointer_rtx, frame.to_allocate);
6172 else
6173 ix86_emit_save_regs_using_mov (hard_frame_pointer_rtx,
6174 -frame.nregs * UNITS_PER_WORD);
6175 }
6176
6177 pic_reg_used = false;
6178 if (pic_offset_table_rtx
6179 && (df_regs_ever_live_p (REAL_PIC_OFFSET_TABLE_REGNUM)
6180 || current_function_profile))
6181 {
6182 unsigned int alt_pic_reg_used = ix86_select_alt_pic_regnum ();
6183
6184 if (alt_pic_reg_used != INVALID_REGNUM)
6185 SET_REGNO (pic_offset_table_rtx, alt_pic_reg_used);
6186
6187 pic_reg_used = true;
6188 }
6189
6190 if (pic_reg_used)
6191 {
6192 if (TARGET_64BIT)
6193 {
6194 if (ix86_cmodel == CM_LARGE_PIC)
6195 {
6196 rtx tmp_reg = gen_rtx_REG (DImode,
6197 FIRST_REX_INT_REG + 3 /* R11 */);
6198 rtx label = gen_label_rtx ();
6199 emit_label (label);
6200 LABEL_PRESERVE_P (label) = 1;
6201 gcc_assert (REGNO (pic_offset_table_rtx) != REGNO (tmp_reg));
6202 insn = emit_insn (gen_set_rip_rex64 (pic_offset_table_rtx, label));
6203 insn = emit_insn (gen_set_got_offset_rex64 (tmp_reg, label));
6204 insn = emit_insn (gen_adddi3 (pic_offset_table_rtx,
6205 pic_offset_table_rtx, tmp_reg));
6206 }
6207 else
6208 insn = emit_insn (gen_set_got_rex64 (pic_offset_table_rtx));
6209 }
6210 else
6211 insn = emit_insn (gen_set_got (pic_offset_table_rtx));
6212 }
6213
6214 /* Prevent function calls from be scheduled before the call to mcount.
6215 In the pic_reg_used case, make sure that the got load isn't deleted. */
6216 if (current_function_profile)
6217 {
6218 if (pic_reg_used)
6219 emit_insn (gen_prologue_use (pic_offset_table_rtx));
6220 emit_insn (gen_blockage ());
6221 }
6222 }
6223
6224 /* Emit code to restore saved registers using MOV insns. First register
6225 is restored from POINTER + OFFSET. */
6226 static void
6227 ix86_emit_restore_regs_using_mov (rtx pointer, HOST_WIDE_INT offset,
6228 int maybe_eh_return)
6229 {
6230 int regno;
6231 rtx base_address = gen_rtx_MEM (Pmode, pointer);
6232
6233 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6234 if (ix86_save_reg (regno, maybe_eh_return))
6235 {
6236 /* Ensure that adjust_address won't be forced to produce pointer
6237 out of range allowed by x86-64 instruction set. */
6238 if (TARGET_64BIT && offset != trunc_int_for_mode (offset, SImode))
6239 {
6240 rtx r11;
6241
6242 r11 = gen_rtx_REG (DImode, R11_REG);
6243 emit_move_insn (r11, GEN_INT (offset));
6244 emit_insn (gen_adddi3 (r11, r11, pointer));
6245 base_address = gen_rtx_MEM (Pmode, r11);
6246 offset = 0;
6247 }
6248 emit_move_insn (gen_rtx_REG (Pmode, regno),
6249 adjust_address (base_address, Pmode, offset));
6250 offset += UNITS_PER_WORD;
6251 }
6252 }
6253
6254 /* Restore function stack, frame, and registers. */
6255
6256 void
6257 ix86_expand_epilogue (int style)
6258 {
6259 int regno;
6260 int sp_valid = !frame_pointer_needed || current_function_sp_is_unchanging;
6261 struct ix86_frame frame;
6262 HOST_WIDE_INT offset;
6263
6264 ix86_compute_frame_layout (&frame);
6265
6266 /* Calculate start of saved registers relative to ebp. Special care
6267 must be taken for the normal return case of a function using
6268 eh_return: the eax and edx registers are marked as saved, but not
6269 restored along this path. */
6270 offset = frame.nregs;
6271 if (current_function_calls_eh_return && style != 2)
6272 offset -= 2;
6273 offset *= -UNITS_PER_WORD;
6274
6275 /* If we're only restoring one register and sp is not valid then
6276 using a move instruction to restore the register since it's
6277 less work than reloading sp and popping the register.
6278
6279 The default code result in stack adjustment using add/lea instruction,
6280 while this code results in LEAVE instruction (or discrete equivalent),
6281 so it is profitable in some other cases as well. Especially when there
6282 are no registers to restore. We also use this code when TARGET_USE_LEAVE
6283 and there is exactly one register to pop. This heuristic may need some
6284 tuning in future. */
6285 if ((!sp_valid && frame.nregs <= 1)
6286 || (TARGET_EPILOGUE_USING_MOVE
6287 && cfun->machine->use_fast_prologue_epilogue
6288 && (frame.nregs > 1 || frame.to_allocate))
6289 || (frame_pointer_needed && !frame.nregs && frame.to_allocate)
6290 || (frame_pointer_needed && TARGET_USE_LEAVE
6291 && cfun->machine->use_fast_prologue_epilogue
6292 && frame.nregs == 1)
6293 || current_function_calls_eh_return)
6294 {
6295 /* Restore registers. We can use ebp or esp to address the memory
6296 locations. If both are available, default to ebp, since offsets
6297 are known to be small. Only exception is esp pointing directly to the
6298 end of block of saved registers, where we may simplify addressing
6299 mode. */
6300
6301 if (!frame_pointer_needed || (sp_valid && !frame.to_allocate))
6302 ix86_emit_restore_regs_using_mov (stack_pointer_rtx,
6303 frame.to_allocate, style == 2);
6304 else
6305 ix86_emit_restore_regs_using_mov (hard_frame_pointer_rtx,
6306 offset, style == 2);
6307
6308 /* eh_return epilogues need %ecx added to the stack pointer. */
6309 if (style == 2)
6310 {
6311 rtx tmp, sa = EH_RETURN_STACKADJ_RTX;
6312
6313 if (frame_pointer_needed)
6314 {
6315 tmp = gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, sa);
6316 tmp = plus_constant (tmp, UNITS_PER_WORD);
6317 emit_insn (gen_rtx_SET (VOIDmode, sa, tmp));
6318
6319 tmp = gen_rtx_MEM (Pmode, hard_frame_pointer_rtx);
6320 emit_move_insn (hard_frame_pointer_rtx, tmp);
6321
6322 pro_epilogue_adjust_stack (stack_pointer_rtx, sa,
6323 const0_rtx, style);
6324 }
6325 else
6326 {
6327 tmp = gen_rtx_PLUS (Pmode, stack_pointer_rtx, sa);
6328 tmp = plus_constant (tmp, (frame.to_allocate
6329 + frame.nregs * UNITS_PER_WORD));
6330 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx, tmp));
6331 }
6332 }
6333 else if (!frame_pointer_needed)
6334 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6335 GEN_INT (frame.to_allocate
6336 + frame.nregs * UNITS_PER_WORD),
6337 style);
6338 /* If not an i386, mov & pop is faster than "leave". */
6339 else if (TARGET_USE_LEAVE || optimize_size
6340 || !cfun->machine->use_fast_prologue_epilogue)
6341 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6342 else
6343 {
6344 pro_epilogue_adjust_stack (stack_pointer_rtx,
6345 hard_frame_pointer_rtx,
6346 const0_rtx, style);
6347 if (TARGET_64BIT)
6348 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6349 else
6350 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6351 }
6352 }
6353 else
6354 {
6355 /* First step is to deallocate the stack frame so that we can
6356 pop the registers. */
6357 if (!sp_valid)
6358 {
6359 gcc_assert (frame_pointer_needed);
6360 pro_epilogue_adjust_stack (stack_pointer_rtx,
6361 hard_frame_pointer_rtx,
6362 GEN_INT (offset), style);
6363 }
6364 else if (frame.to_allocate)
6365 pro_epilogue_adjust_stack (stack_pointer_rtx, stack_pointer_rtx,
6366 GEN_INT (frame.to_allocate), style);
6367
6368 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
6369 if (ix86_save_reg (regno, false))
6370 {
6371 if (TARGET_64BIT)
6372 emit_insn (gen_popdi1 (gen_rtx_REG (Pmode, regno)));
6373 else
6374 emit_insn (gen_popsi1 (gen_rtx_REG (Pmode, regno)));
6375 }
6376 if (frame_pointer_needed)
6377 {
6378 /* Leave results in shorter dependency chains on CPUs that are
6379 able to grok it fast. */
6380 if (TARGET_USE_LEAVE)
6381 emit_insn (TARGET_64BIT ? gen_leave_rex64 () : gen_leave ());
6382 else if (TARGET_64BIT)
6383 emit_insn (gen_popdi1 (hard_frame_pointer_rtx));
6384 else
6385 emit_insn (gen_popsi1 (hard_frame_pointer_rtx));
6386 }
6387 }
6388
6389 if (cfun->machine->force_align_arg_pointer)
6390 {
6391 emit_insn (gen_addsi3 (stack_pointer_rtx,
6392 cfun->machine->force_align_arg_pointer,
6393 GEN_INT (-4)));
6394 }
6395
6396 /* Sibcall epilogues don't want a return instruction. */
6397 if (style == 0)
6398 return;
6399
6400 if (current_function_pops_args && current_function_args_size)
6401 {
6402 rtx popc = GEN_INT (current_function_pops_args);
6403
6404 /* i386 can only pop 64K bytes. If asked to pop more, pop
6405 return address, do explicit add, and jump indirectly to the
6406 caller. */
6407
6408 if (current_function_pops_args >= 65536)
6409 {
6410 rtx ecx = gen_rtx_REG (SImode, 2);
6411
6412 /* There is no "pascal" calling convention in any 64bit ABI. */
6413 gcc_assert (!TARGET_64BIT);
6414
6415 emit_insn (gen_popsi1 (ecx));
6416 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, popc));
6417 emit_jump_insn (gen_return_indirect_internal (ecx));
6418 }
6419 else
6420 emit_jump_insn (gen_return_pop_internal (popc));
6421 }
6422 else
6423 emit_jump_insn (gen_return_internal ());
6424 }
6425
6426 /* Reset from the function's potential modifications. */
6427
6428 static void
6429 ix86_output_function_epilogue (FILE *file ATTRIBUTE_UNUSED,
6430 HOST_WIDE_INT size ATTRIBUTE_UNUSED)
6431 {
6432 if (pic_offset_table_rtx)
6433 SET_REGNO (pic_offset_table_rtx, REAL_PIC_OFFSET_TABLE_REGNUM);
6434 #if TARGET_MACHO
6435 /* Mach-O doesn't support labels at the end of objects, so if
6436 it looks like we might want one, insert a NOP. */
6437 {
6438 rtx insn = get_last_insn ();
6439 while (insn
6440 && NOTE_P (insn)
6441 && NOTE_KIND (insn) != NOTE_INSN_DELETED_LABEL)
6442 insn = PREV_INSN (insn);
6443 if (insn
6444 && (LABEL_P (insn)
6445 || (NOTE_P (insn)
6446 && NOTE_KIND (insn) == NOTE_INSN_DELETED_LABEL)))
6447 fputs ("\tnop\n", file);
6448 }
6449 #endif
6450
6451 }
6452 \f
6453 /* Extract the parts of an RTL expression that is a valid memory address
6454 for an instruction. Return 0 if the structure of the address is
6455 grossly off. Return -1 if the address contains ASHIFT, so it is not
6456 strictly valid, but still used for computing length of lea instruction. */
6457
6458 int
6459 ix86_decompose_address (rtx addr, struct ix86_address *out)
6460 {
6461 rtx base = NULL_RTX, index = NULL_RTX, disp = NULL_RTX;
6462 rtx base_reg, index_reg;
6463 HOST_WIDE_INT scale = 1;
6464 rtx scale_rtx = NULL_RTX;
6465 int retval = 1;
6466 enum ix86_address_seg seg = SEG_DEFAULT;
6467
6468 if (REG_P (addr) || GET_CODE (addr) == SUBREG)
6469 base = addr;
6470 else if (GET_CODE (addr) == PLUS)
6471 {
6472 rtx addends[4], op;
6473 int n = 0, i;
6474
6475 op = addr;
6476 do
6477 {
6478 if (n >= 4)
6479 return 0;
6480 addends[n++] = XEXP (op, 1);
6481 op = XEXP (op, 0);
6482 }
6483 while (GET_CODE (op) == PLUS);
6484 if (n >= 4)
6485 return 0;
6486 addends[n] = op;
6487
6488 for (i = n; i >= 0; --i)
6489 {
6490 op = addends[i];
6491 switch (GET_CODE (op))
6492 {
6493 case MULT:
6494 if (index)
6495 return 0;
6496 index = XEXP (op, 0);
6497 scale_rtx = XEXP (op, 1);
6498 break;
6499
6500 case UNSPEC:
6501 if (XINT (op, 1) == UNSPEC_TP
6502 && TARGET_TLS_DIRECT_SEG_REFS
6503 && seg == SEG_DEFAULT)
6504 seg = TARGET_64BIT ? SEG_FS : SEG_GS;
6505 else
6506 return 0;
6507 break;
6508
6509 case REG:
6510 case SUBREG:
6511 if (!base)
6512 base = op;
6513 else if (!index)
6514 index = op;
6515 else
6516 return 0;
6517 break;
6518
6519 case CONST:
6520 case CONST_INT:
6521 case SYMBOL_REF:
6522 case LABEL_REF:
6523 if (disp)
6524 return 0;
6525 disp = op;
6526 break;
6527
6528 default:
6529 return 0;
6530 }
6531 }
6532 }
6533 else if (GET_CODE (addr) == MULT)
6534 {
6535 index = XEXP (addr, 0); /* index*scale */
6536 scale_rtx = XEXP (addr, 1);
6537 }
6538 else if (GET_CODE (addr) == ASHIFT)
6539 {
6540 rtx tmp;
6541
6542 /* We're called for lea too, which implements ashift on occasion. */
6543 index = XEXP (addr, 0);
6544 tmp = XEXP (addr, 1);
6545 if (!CONST_INT_P (tmp))
6546 return 0;
6547 scale = INTVAL (tmp);
6548 if ((unsigned HOST_WIDE_INT) scale > 3)
6549 return 0;
6550 scale = 1 << scale;
6551 retval = -1;
6552 }
6553 else
6554 disp = addr; /* displacement */
6555
6556 /* Extract the integral value of scale. */
6557 if (scale_rtx)
6558 {
6559 if (!CONST_INT_P (scale_rtx))
6560 return 0;
6561 scale = INTVAL (scale_rtx);
6562 }
6563
6564 base_reg = base && GET_CODE (base) == SUBREG ? SUBREG_REG (base) : base;
6565 index_reg = index && GET_CODE (index) == SUBREG ? SUBREG_REG (index) : index;
6566
6567 /* Allow arg pointer and stack pointer as index if there is not scaling. */
6568 if (base_reg && index_reg && scale == 1
6569 && (index_reg == arg_pointer_rtx
6570 || index_reg == frame_pointer_rtx
6571 || (REG_P (index_reg) && REGNO (index_reg) == STACK_POINTER_REGNUM)))
6572 {
6573 rtx tmp;
6574 tmp = base, base = index, index = tmp;
6575 tmp = base_reg, base_reg = index_reg, index_reg = tmp;
6576 }
6577
6578 /* Special case: %ebp cannot be encoded as a base without a displacement. */
6579 if ((base_reg == hard_frame_pointer_rtx
6580 || base_reg == frame_pointer_rtx
6581 || base_reg == arg_pointer_rtx) && !disp)
6582 disp = const0_rtx;
6583
6584 /* Special case: on K6, [%esi] makes the instruction vector decoded.
6585 Avoid this by transforming to [%esi+0]. */
6586 if (ix86_tune == PROCESSOR_K6 && !optimize_size
6587 && base_reg && !index_reg && !disp
6588 && REG_P (base_reg)
6589 && REGNO_REG_CLASS (REGNO (base_reg)) == SIREG)
6590 disp = const0_rtx;
6591
6592 /* Special case: encode reg+reg instead of reg*2. */
6593 if (!base && index && scale && scale == 2)
6594 base = index, base_reg = index_reg, scale = 1;
6595
6596 /* Special case: scaling cannot be encoded without base or displacement. */
6597 if (!base && !disp && index && scale != 1)
6598 disp = const0_rtx;
6599
6600 out->base = base;
6601 out->index = index;
6602 out->disp = disp;
6603 out->scale = scale;
6604 out->seg = seg;
6605
6606 return retval;
6607 }
6608 \f
6609 /* Return cost of the memory address x.
6610 For i386, it is better to use a complex address than let gcc copy
6611 the address into a reg and make a new pseudo. But not if the address
6612 requires to two regs - that would mean more pseudos with longer
6613 lifetimes. */
6614 static int
6615 ix86_address_cost (rtx x)
6616 {
6617 struct ix86_address parts;
6618 int cost = 1;
6619 int ok = ix86_decompose_address (x, &parts);
6620
6621 gcc_assert (ok);
6622
6623 if (parts.base && GET_CODE (parts.base) == SUBREG)
6624 parts.base = SUBREG_REG (parts.base);
6625 if (parts.index && GET_CODE (parts.index) == SUBREG)
6626 parts.index = SUBREG_REG (parts.index);
6627
6628 /* Attempt to minimize number of registers in the address. */
6629 if ((parts.base
6630 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER))
6631 || (parts.index
6632 && (!REG_P (parts.index)
6633 || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)))
6634 cost++;
6635
6636 if (parts.base
6637 && (!REG_P (parts.base) || REGNO (parts.base) >= FIRST_PSEUDO_REGISTER)
6638 && parts.index
6639 && (!REG_P (parts.index) || REGNO (parts.index) >= FIRST_PSEUDO_REGISTER)
6640 && parts.base != parts.index)
6641 cost++;
6642
6643 /* AMD-K6 don't like addresses with ModR/M set to 00_xxx_100b,
6644 since it's predecode logic can't detect the length of instructions
6645 and it degenerates to vector decoded. Increase cost of such
6646 addresses here. The penalty is minimally 2 cycles. It may be worthwhile
6647 to split such addresses or even refuse such addresses at all.
6648
6649 Following addressing modes are affected:
6650 [base+scale*index]
6651 [scale*index+disp]
6652 [base+index]
6653
6654 The first and last case may be avoidable by explicitly coding the zero in
6655 memory address, but I don't have AMD-K6 machine handy to check this
6656 theory. */
6657
6658 if (TARGET_K6
6659 && ((!parts.disp && parts.base && parts.index && parts.scale != 1)
6660 || (parts.disp && !parts.base && parts.index && parts.scale != 1)
6661 || (!parts.disp && parts.base && parts.index && parts.scale == 1)))
6662 cost += 10;
6663
6664 return cost;
6665 }
6666 \f
6667 /* Allow {LABEL | SYMBOL}_REF - SYMBOL_REF-FOR-PICBASE for Mach-O as
6668 this is used for to form addresses to local data when -fPIC is in
6669 use. */
6670
6671 static bool
6672 darwin_local_data_pic (rtx disp)
6673 {
6674 if (GET_CODE (disp) == MINUS)
6675 {
6676 if (GET_CODE (XEXP (disp, 0)) == LABEL_REF
6677 || GET_CODE (XEXP (disp, 0)) == SYMBOL_REF)
6678 if (GET_CODE (XEXP (disp, 1)) == SYMBOL_REF)
6679 {
6680 const char *sym_name = XSTR (XEXP (disp, 1), 0);
6681 if (! strcmp (sym_name, "<pic base>"))
6682 return true;
6683 }
6684 }
6685
6686 return false;
6687 }
6688
6689 /* Determine if a given RTX is a valid constant. We already know this
6690 satisfies CONSTANT_P. */
6691
6692 bool
6693 legitimate_constant_p (rtx x)
6694 {
6695 switch (GET_CODE (x))
6696 {
6697 case CONST:
6698 x = XEXP (x, 0);
6699
6700 if (GET_CODE (x) == PLUS)
6701 {
6702 if (!CONST_INT_P (XEXP (x, 1)))
6703 return false;
6704 x = XEXP (x, 0);
6705 }
6706
6707 if (TARGET_MACHO && darwin_local_data_pic (x))
6708 return true;
6709
6710 /* Only some unspecs are valid as "constants". */
6711 if (GET_CODE (x) == UNSPEC)
6712 switch (XINT (x, 1))
6713 {
6714 case UNSPEC_GOT:
6715 case UNSPEC_GOTOFF:
6716 case UNSPEC_PLTOFF:
6717 return TARGET_64BIT;
6718 case UNSPEC_TPOFF:
6719 case UNSPEC_NTPOFF:
6720 x = XVECEXP (x, 0, 0);
6721 return (GET_CODE (x) == SYMBOL_REF
6722 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6723 case UNSPEC_DTPOFF:
6724 x = XVECEXP (x, 0, 0);
6725 return (GET_CODE (x) == SYMBOL_REF
6726 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC);
6727 default:
6728 return false;
6729 }
6730
6731 /* We must have drilled down to a symbol. */
6732 if (GET_CODE (x) == LABEL_REF)
6733 return true;
6734 if (GET_CODE (x) != SYMBOL_REF)
6735 return false;
6736 /* FALLTHRU */
6737
6738 case SYMBOL_REF:
6739 /* TLS symbols are never valid. */
6740 if (SYMBOL_REF_TLS_MODEL (x))
6741 return false;
6742
6743 /* DLLIMPORT symbols are never valid. */
6744 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
6745 && SYMBOL_REF_DLLIMPORT_P (x))
6746 return false;
6747 break;
6748
6749 case CONST_DOUBLE:
6750 if (GET_MODE (x) == TImode
6751 && x != CONST0_RTX (TImode)
6752 && !TARGET_64BIT)
6753 return false;
6754 break;
6755
6756 case CONST_VECTOR:
6757 if (x == CONST0_RTX (GET_MODE (x)))
6758 return true;
6759 return false;
6760
6761 default:
6762 break;
6763 }
6764
6765 /* Otherwise we handle everything else in the move patterns. */
6766 return true;
6767 }
6768
6769 /* Determine if it's legal to put X into the constant pool. This
6770 is not possible for the address of thread-local symbols, which
6771 is checked above. */
6772
6773 static bool
6774 ix86_cannot_force_const_mem (rtx x)
6775 {
6776 /* We can always put integral constants and vectors in memory. */
6777 switch (GET_CODE (x))
6778 {
6779 case CONST_INT:
6780 case CONST_DOUBLE:
6781 case CONST_VECTOR:
6782 return false;
6783
6784 default:
6785 break;
6786 }
6787 return !legitimate_constant_p (x);
6788 }
6789
6790 /* Determine if a given RTX is a valid constant address. */
6791
6792 bool
6793 constant_address_p (rtx x)
6794 {
6795 return CONSTANT_P (x) && legitimate_address_p (Pmode, x, 1);
6796 }
6797
6798 /* Nonzero if the constant value X is a legitimate general operand
6799 when generating PIC code. It is given that flag_pic is on and
6800 that X satisfies CONSTANT_P or is a CONST_DOUBLE. */
6801
6802 bool
6803 legitimate_pic_operand_p (rtx x)
6804 {
6805 rtx inner;
6806
6807 switch (GET_CODE (x))
6808 {
6809 case CONST:
6810 inner = XEXP (x, 0);
6811 if (GET_CODE (inner) == PLUS
6812 && CONST_INT_P (XEXP (inner, 1)))
6813 inner = XEXP (inner, 0);
6814
6815 /* Only some unspecs are valid as "constants". */
6816 if (GET_CODE (inner) == UNSPEC)
6817 switch (XINT (inner, 1))
6818 {
6819 case UNSPEC_GOT:
6820 case UNSPEC_GOTOFF:
6821 case UNSPEC_PLTOFF:
6822 return TARGET_64BIT;
6823 case UNSPEC_TPOFF:
6824 x = XVECEXP (inner, 0, 0);
6825 return (GET_CODE (x) == SYMBOL_REF
6826 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_EXEC);
6827 default:
6828 return false;
6829 }
6830 /* FALLTHRU */
6831
6832 case SYMBOL_REF:
6833 case LABEL_REF:
6834 return legitimate_pic_address_disp_p (x);
6835
6836 default:
6837 return true;
6838 }
6839 }
6840
6841 /* Determine if a given CONST RTX is a valid memory displacement
6842 in PIC mode. */
6843
6844 int
6845 legitimate_pic_address_disp_p (rtx disp)
6846 {
6847 bool saw_plus;
6848
6849 /* In 64bit mode we can allow direct addresses of symbols and labels
6850 when they are not dynamic symbols. */
6851 if (TARGET_64BIT)
6852 {
6853 rtx op0 = disp, op1;
6854
6855 switch (GET_CODE (disp))
6856 {
6857 case LABEL_REF:
6858 return true;
6859
6860 case CONST:
6861 if (GET_CODE (XEXP (disp, 0)) != PLUS)
6862 break;
6863 op0 = XEXP (XEXP (disp, 0), 0);
6864 op1 = XEXP (XEXP (disp, 0), 1);
6865 if (!CONST_INT_P (op1)
6866 || INTVAL (op1) >= 16*1024*1024
6867 || INTVAL (op1) < -16*1024*1024)
6868 break;
6869 if (GET_CODE (op0) == LABEL_REF)
6870 return true;
6871 if (GET_CODE (op0) != SYMBOL_REF)
6872 break;
6873 /* FALLTHRU */
6874
6875 case SYMBOL_REF:
6876 /* TLS references should always be enclosed in UNSPEC. */
6877 if (SYMBOL_REF_TLS_MODEL (op0))
6878 return false;
6879 if (!SYMBOL_REF_FAR_ADDR_P (op0) && SYMBOL_REF_LOCAL_P (op0)
6880 && ix86_cmodel != CM_LARGE_PIC)
6881 return true;
6882 break;
6883
6884 default:
6885 break;
6886 }
6887 }
6888 if (GET_CODE (disp) != CONST)
6889 return 0;
6890 disp = XEXP (disp, 0);
6891
6892 if (TARGET_64BIT)
6893 {
6894 /* We are unsafe to allow PLUS expressions. This limit allowed distance
6895 of GOT tables. We should not need these anyway. */
6896 if (GET_CODE (disp) != UNSPEC
6897 || (XINT (disp, 1) != UNSPEC_GOTPCREL
6898 && XINT (disp, 1) != UNSPEC_GOTOFF
6899 && XINT (disp, 1) != UNSPEC_PLTOFF))
6900 return 0;
6901
6902 if (GET_CODE (XVECEXP (disp, 0, 0)) != SYMBOL_REF
6903 && GET_CODE (XVECEXP (disp, 0, 0)) != LABEL_REF)
6904 return 0;
6905 return 1;
6906 }
6907
6908 saw_plus = false;
6909 if (GET_CODE (disp) == PLUS)
6910 {
6911 if (!CONST_INT_P (XEXP (disp, 1)))
6912 return 0;
6913 disp = XEXP (disp, 0);
6914 saw_plus = true;
6915 }
6916
6917 if (TARGET_MACHO && darwin_local_data_pic (disp))
6918 return 1;
6919
6920 if (GET_CODE (disp) != UNSPEC)
6921 return 0;
6922
6923 switch (XINT (disp, 1))
6924 {
6925 case UNSPEC_GOT:
6926 if (saw_plus)
6927 return false;
6928 /* We need to check for both symbols and labels because VxWorks loads
6929 text labels with @GOT rather than @GOTOFF. See gotoff_operand for
6930 details. */
6931 return (GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6932 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF);
6933 case UNSPEC_GOTOFF:
6934 /* Refuse GOTOFF in 64bit mode since it is always 64bit when used.
6935 While ABI specify also 32bit relocation but we don't produce it in
6936 small PIC model at all. */
6937 if ((GET_CODE (XVECEXP (disp, 0, 0)) == SYMBOL_REF
6938 || GET_CODE (XVECEXP (disp, 0, 0)) == LABEL_REF)
6939 && !TARGET_64BIT)
6940 return gotoff_operand (XVECEXP (disp, 0, 0), Pmode);
6941 return false;
6942 case UNSPEC_GOTTPOFF:
6943 case UNSPEC_GOTNTPOFF:
6944 case UNSPEC_INDNTPOFF:
6945 if (saw_plus)
6946 return false;
6947 disp = XVECEXP (disp, 0, 0);
6948 return (GET_CODE (disp) == SYMBOL_REF
6949 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_INITIAL_EXEC);
6950 case UNSPEC_NTPOFF:
6951 disp = XVECEXP (disp, 0, 0);
6952 return (GET_CODE (disp) == SYMBOL_REF
6953 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_EXEC);
6954 case UNSPEC_DTPOFF:
6955 disp = XVECEXP (disp, 0, 0);
6956 return (GET_CODE (disp) == SYMBOL_REF
6957 && SYMBOL_REF_TLS_MODEL (disp) == TLS_MODEL_LOCAL_DYNAMIC);
6958 }
6959
6960 return 0;
6961 }
6962
6963 /* GO_IF_LEGITIMATE_ADDRESS recognizes an RTL expression that is a valid
6964 memory address for an instruction. The MODE argument is the machine mode
6965 for the MEM expression that wants to use this address.
6966
6967 It only recognizes address in canonical form. LEGITIMIZE_ADDRESS should
6968 convert common non-canonical forms to canonical form so that they will
6969 be recognized. */
6970
6971 int
6972 legitimate_address_p (enum machine_mode mode ATTRIBUTE_UNUSED,
6973 rtx addr, int strict)
6974 {
6975 struct ix86_address parts;
6976 rtx base, index, disp;
6977 HOST_WIDE_INT scale;
6978 const char *reason = NULL;
6979 rtx reason_rtx = NULL_RTX;
6980
6981 if (ix86_decompose_address (addr, &parts) <= 0)
6982 {
6983 reason = "decomposition failed";
6984 goto report_error;
6985 }
6986
6987 base = parts.base;
6988 index = parts.index;
6989 disp = parts.disp;
6990 scale = parts.scale;
6991
6992 /* Validate base register.
6993
6994 Don't allow SUBREG's that span more than a word here. It can lead to spill
6995 failures when the base is one word out of a two word structure, which is
6996 represented internally as a DImode int. */
6997
6998 if (base)
6999 {
7000 rtx reg;
7001 reason_rtx = base;
7002
7003 if (REG_P (base))
7004 reg = base;
7005 else if (GET_CODE (base) == SUBREG
7006 && REG_P (SUBREG_REG (base))
7007 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (base)))
7008 <= UNITS_PER_WORD)
7009 reg = SUBREG_REG (base);
7010 else
7011 {
7012 reason = "base is not a register";
7013 goto report_error;
7014 }
7015
7016 if (GET_MODE (base) != Pmode)
7017 {
7018 reason = "base is not in Pmode";
7019 goto report_error;
7020 }
7021
7022 if ((strict && ! REG_OK_FOR_BASE_STRICT_P (reg))
7023 || (! strict && ! REG_OK_FOR_BASE_NONSTRICT_P (reg)))
7024 {
7025 reason = "base is not valid";
7026 goto report_error;
7027 }
7028 }
7029
7030 /* Validate index register.
7031
7032 Don't allow SUBREG's that span more than a word here -- same as above. */
7033
7034 if (index)
7035 {
7036 rtx reg;
7037 reason_rtx = index;
7038
7039 if (REG_P (index))
7040 reg = index;
7041 else if (GET_CODE (index) == SUBREG
7042 && REG_P (SUBREG_REG (index))
7043 && GET_MODE_SIZE (GET_MODE (SUBREG_REG (index)))
7044 <= UNITS_PER_WORD)
7045 reg = SUBREG_REG (index);
7046 else
7047 {
7048 reason = "index is not a register";
7049 goto report_error;
7050 }
7051
7052 if (GET_MODE (index) != Pmode)
7053 {
7054 reason = "index is not in Pmode";
7055 goto report_error;
7056 }
7057
7058 if ((strict && ! REG_OK_FOR_INDEX_STRICT_P (reg))
7059 || (! strict && ! REG_OK_FOR_INDEX_NONSTRICT_P (reg)))
7060 {
7061 reason = "index is not valid";
7062 goto report_error;
7063 }
7064 }
7065
7066 /* Validate scale factor. */
7067 if (scale != 1)
7068 {
7069 reason_rtx = GEN_INT (scale);
7070 if (!index)
7071 {
7072 reason = "scale without index";
7073 goto report_error;
7074 }
7075
7076 if (scale != 2 && scale != 4 && scale != 8)
7077 {
7078 reason = "scale is not a valid multiplier";
7079 goto report_error;
7080 }
7081 }
7082
7083 /* Validate displacement. */
7084 if (disp)
7085 {
7086 reason_rtx = disp;
7087
7088 if (GET_CODE (disp) == CONST
7089 && GET_CODE (XEXP (disp, 0)) == UNSPEC)
7090 switch (XINT (XEXP (disp, 0), 1))
7091 {
7092 /* Refuse GOTOFF and GOT in 64bit mode since it is always 64bit when
7093 used. While ABI specify also 32bit relocations, we don't produce
7094 them at all and use IP relative instead. */
7095 case UNSPEC_GOT:
7096 case UNSPEC_GOTOFF:
7097 gcc_assert (flag_pic);
7098 if (!TARGET_64BIT)
7099 goto is_legitimate_pic;
7100 reason = "64bit address unspec";
7101 goto report_error;
7102
7103 case UNSPEC_GOTPCREL:
7104 gcc_assert (flag_pic);
7105 goto is_legitimate_pic;
7106
7107 case UNSPEC_GOTTPOFF:
7108 case UNSPEC_GOTNTPOFF:
7109 case UNSPEC_INDNTPOFF:
7110 case UNSPEC_NTPOFF:
7111 case UNSPEC_DTPOFF:
7112 break;
7113
7114 default:
7115 reason = "invalid address unspec";
7116 goto report_error;
7117 }
7118
7119 else if (SYMBOLIC_CONST (disp)
7120 && (flag_pic
7121 || (TARGET_MACHO
7122 #if TARGET_MACHO
7123 && MACHOPIC_INDIRECT
7124 && !machopic_operand_p (disp)
7125 #endif
7126 )))
7127 {
7128
7129 is_legitimate_pic:
7130 if (TARGET_64BIT && (index || base))
7131 {
7132 /* foo@dtpoff(%rX) is ok. */
7133 if (GET_CODE (disp) != CONST
7134 || GET_CODE (XEXP (disp, 0)) != PLUS
7135 || GET_CODE (XEXP (XEXP (disp, 0), 0)) != UNSPEC
7136 || !CONST_INT_P (XEXP (XEXP (disp, 0), 1))
7137 || (XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_DTPOFF
7138 && XINT (XEXP (XEXP (disp, 0), 0), 1) != UNSPEC_NTPOFF))
7139 {
7140 reason = "non-constant pic memory reference";
7141 goto report_error;
7142 }
7143 }
7144 else if (! legitimate_pic_address_disp_p (disp))
7145 {
7146 reason = "displacement is an invalid pic construct";
7147 goto report_error;
7148 }
7149
7150 /* This code used to verify that a symbolic pic displacement
7151 includes the pic_offset_table_rtx register.
7152
7153 While this is good idea, unfortunately these constructs may
7154 be created by "adds using lea" optimization for incorrect
7155 code like:
7156
7157 int a;
7158 int foo(int i)
7159 {
7160 return *(&a+i);
7161 }
7162
7163 This code is nonsensical, but results in addressing
7164 GOT table with pic_offset_table_rtx base. We can't
7165 just refuse it easily, since it gets matched by
7166 "addsi3" pattern, that later gets split to lea in the
7167 case output register differs from input. While this
7168 can be handled by separate addsi pattern for this case
7169 that never results in lea, this seems to be easier and
7170 correct fix for crash to disable this test. */
7171 }
7172 else if (GET_CODE (disp) != LABEL_REF
7173 && !CONST_INT_P (disp)
7174 && (GET_CODE (disp) != CONST
7175 || !legitimate_constant_p (disp))
7176 && (GET_CODE (disp) != SYMBOL_REF
7177 || !legitimate_constant_p (disp)))
7178 {
7179 reason = "displacement is not constant";
7180 goto report_error;
7181 }
7182 else if (TARGET_64BIT
7183 && !x86_64_immediate_operand (disp, VOIDmode))
7184 {
7185 reason = "displacement is out of range";
7186 goto report_error;
7187 }
7188 }
7189
7190 /* Everything looks valid. */
7191 return TRUE;
7192
7193 report_error:
7194 return FALSE;
7195 }
7196 \f
7197 /* Return a unique alias set for the GOT. */
7198
7199 static alias_set_type
7200 ix86_GOT_alias_set (void)
7201 {
7202 static alias_set_type set = -1;
7203 if (set == -1)
7204 set = new_alias_set ();
7205 return set;
7206 }
7207
7208 /* Return a legitimate reference for ORIG (an address) using the
7209 register REG. If REG is 0, a new pseudo is generated.
7210
7211 There are two types of references that must be handled:
7212
7213 1. Global data references must load the address from the GOT, via
7214 the PIC reg. An insn is emitted to do this load, and the reg is
7215 returned.
7216
7217 2. Static data references, constant pool addresses, and code labels
7218 compute the address as an offset from the GOT, whose base is in
7219 the PIC reg. Static data objects have SYMBOL_FLAG_LOCAL set to
7220 differentiate them from global data objects. The returned
7221 address is the PIC reg + an unspec constant.
7222
7223 GO_IF_LEGITIMATE_ADDRESS rejects symbolic references unless the PIC
7224 reg also appears in the address. */
7225
7226 static rtx
7227 legitimize_pic_address (rtx orig, rtx reg)
7228 {
7229 rtx addr = orig;
7230 rtx new_rtx = orig;
7231 rtx base;
7232
7233 #if TARGET_MACHO
7234 if (TARGET_MACHO && !TARGET_64BIT)
7235 {
7236 if (reg == 0)
7237 reg = gen_reg_rtx (Pmode);
7238 /* Use the generic Mach-O PIC machinery. */
7239 return machopic_legitimize_pic_address (orig, GET_MODE (orig), reg);
7240 }
7241 #endif
7242
7243 if (TARGET_64BIT && legitimate_pic_address_disp_p (addr))
7244 new_rtx = addr;
7245 else if (TARGET_64BIT
7246 && ix86_cmodel != CM_SMALL_PIC
7247 && gotoff_operand (addr, Pmode))
7248 {
7249 rtx tmpreg;
7250 /* This symbol may be referenced via a displacement from the PIC
7251 base address (@GOTOFF). */
7252
7253 if (reload_in_progress)
7254 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7255 if (GET_CODE (addr) == CONST)
7256 addr = XEXP (addr, 0);
7257 if (GET_CODE (addr) == PLUS)
7258 {
7259 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7260 UNSPEC_GOTOFF);
7261 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7262 }
7263 else
7264 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7265 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7266 if (!reg)
7267 tmpreg = gen_reg_rtx (Pmode);
7268 else
7269 tmpreg = reg;
7270 emit_move_insn (tmpreg, new_rtx);
7271
7272 if (reg != 0)
7273 {
7274 new_rtx = expand_simple_binop (Pmode, PLUS, reg, pic_offset_table_rtx,
7275 tmpreg, 1, OPTAB_DIRECT);
7276 new_rtx = reg;
7277 }
7278 else new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, tmpreg);
7279 }
7280 else if (!TARGET_64BIT && gotoff_operand (addr, Pmode))
7281 {
7282 /* This symbol may be referenced via a displacement from the PIC
7283 base address (@GOTOFF). */
7284
7285 if (reload_in_progress)
7286 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7287 if (GET_CODE (addr) == CONST)
7288 addr = XEXP (addr, 0);
7289 if (GET_CODE (addr) == PLUS)
7290 {
7291 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, XEXP (addr, 0)),
7292 UNSPEC_GOTOFF);
7293 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, XEXP (addr, 1));
7294 }
7295 else
7296 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTOFF);
7297 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7298 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7299
7300 if (reg != 0)
7301 {
7302 emit_move_insn (reg, new_rtx);
7303 new_rtx = reg;
7304 }
7305 }
7306 else if ((GET_CODE (addr) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (addr) == 0)
7307 /* We can't use @GOTOFF for text labels on VxWorks;
7308 see gotoff_operand. */
7309 || (TARGET_VXWORKS_RTP && GET_CODE (addr) == LABEL_REF))
7310 {
7311 /* Given that we've already handled dllimport variables separately
7312 in legitimize_address, and all other variables should satisfy
7313 legitimate_pic_address_disp_p, we should never arrive here. */
7314 gcc_assert (!TARGET_64BIT_MS_ABI);
7315
7316 if (TARGET_64BIT && ix86_cmodel != CM_LARGE_PIC)
7317 {
7318 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOTPCREL);
7319 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7320 new_rtx = gen_const_mem (Pmode, new_rtx);
7321 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7322
7323 if (reg == 0)
7324 reg = gen_reg_rtx (Pmode);
7325 /* Use directly gen_movsi, otherwise the address is loaded
7326 into register for CSE. We don't want to CSE this addresses,
7327 instead we CSE addresses from the GOT table, so skip this. */
7328 emit_insn (gen_movsi (reg, new_rtx));
7329 new_rtx = reg;
7330 }
7331 else
7332 {
7333 /* This symbol must be referenced via a load from the
7334 Global Offset Table (@GOT). */
7335
7336 if (reload_in_progress)
7337 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7338 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, addr), UNSPEC_GOT);
7339 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7340 if (TARGET_64BIT)
7341 new_rtx = force_reg (Pmode, new_rtx);
7342 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7343 new_rtx = gen_const_mem (Pmode, new_rtx);
7344 set_mem_alias_set (new_rtx, ix86_GOT_alias_set ());
7345
7346 if (reg == 0)
7347 reg = gen_reg_rtx (Pmode);
7348 emit_move_insn (reg, new_rtx);
7349 new_rtx = reg;
7350 }
7351 }
7352 else
7353 {
7354 if (CONST_INT_P (addr)
7355 && !x86_64_immediate_operand (addr, VOIDmode))
7356 {
7357 if (reg)
7358 {
7359 emit_move_insn (reg, addr);
7360 new_rtx = reg;
7361 }
7362 else
7363 new_rtx = force_reg (Pmode, addr);
7364 }
7365 else if (GET_CODE (addr) == CONST)
7366 {
7367 addr = XEXP (addr, 0);
7368
7369 /* We must match stuff we generate before. Assume the only
7370 unspecs that can get here are ours. Not that we could do
7371 anything with them anyway.... */
7372 if (GET_CODE (addr) == UNSPEC
7373 || (GET_CODE (addr) == PLUS
7374 && GET_CODE (XEXP (addr, 0)) == UNSPEC))
7375 return orig;
7376 gcc_assert (GET_CODE (addr) == PLUS);
7377 }
7378 if (GET_CODE (addr) == PLUS)
7379 {
7380 rtx op0 = XEXP (addr, 0), op1 = XEXP (addr, 1);
7381
7382 /* Check first to see if this is a constant offset from a @GOTOFF
7383 symbol reference. */
7384 if (gotoff_operand (op0, Pmode)
7385 && CONST_INT_P (op1))
7386 {
7387 if (!TARGET_64BIT)
7388 {
7389 if (reload_in_progress)
7390 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7391 new_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, op0),
7392 UNSPEC_GOTOFF);
7393 new_rtx = gen_rtx_PLUS (Pmode, new_rtx, op1);
7394 new_rtx = gen_rtx_CONST (Pmode, new_rtx);
7395 new_rtx = gen_rtx_PLUS (Pmode, pic_offset_table_rtx, new_rtx);
7396
7397 if (reg != 0)
7398 {
7399 emit_move_insn (reg, new_rtx);
7400 new_rtx = reg;
7401 }
7402 }
7403 else
7404 {
7405 if (INTVAL (op1) < -16*1024*1024
7406 || INTVAL (op1) >= 16*1024*1024)
7407 {
7408 if (!x86_64_immediate_operand (op1, Pmode))
7409 op1 = force_reg (Pmode, op1);
7410 new_rtx = gen_rtx_PLUS (Pmode, force_reg (Pmode, op0), op1);
7411 }
7412 }
7413 }
7414 else
7415 {
7416 base = legitimize_pic_address (XEXP (addr, 0), reg);
7417 new_rtx = legitimize_pic_address (XEXP (addr, 1),
7418 base == reg ? NULL_RTX : reg);
7419
7420 if (CONST_INT_P (new_rtx))
7421 new_rtx = plus_constant (base, INTVAL (new_rtx));
7422 else
7423 {
7424 if (GET_CODE (new_rtx) == PLUS && CONSTANT_P (XEXP (new_rtx, 1)))
7425 {
7426 base = gen_rtx_PLUS (Pmode, base, XEXP (new_rtx, 0));
7427 new_rtx = XEXP (new_rtx, 1);
7428 }
7429 new_rtx = gen_rtx_PLUS (Pmode, base, new_rtx);
7430 }
7431 }
7432 }
7433 }
7434 return new_rtx;
7435 }
7436 \f
7437 /* Load the thread pointer. If TO_REG is true, force it into a register. */
7438
7439 static rtx
7440 get_thread_pointer (int to_reg)
7441 {
7442 rtx tp, reg, insn;
7443
7444 tp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const0_rtx), UNSPEC_TP);
7445 if (!to_reg)
7446 return tp;
7447
7448 reg = gen_reg_rtx (Pmode);
7449 insn = gen_rtx_SET (VOIDmode, reg, tp);
7450 insn = emit_insn (insn);
7451
7452 return reg;
7453 }
7454
7455 /* A subroutine of legitimize_address and ix86_expand_move. FOR_MOV is
7456 false if we expect this to be used for a memory address and true if
7457 we expect to load the address into a register. */
7458
7459 static rtx
7460 legitimize_tls_address (rtx x, enum tls_model model, int for_mov)
7461 {
7462 rtx dest, base, off, pic, tp;
7463 int type;
7464
7465 switch (model)
7466 {
7467 case TLS_MODEL_GLOBAL_DYNAMIC:
7468 dest = gen_reg_rtx (Pmode);
7469 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7470
7471 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7472 {
7473 rtx rax = gen_rtx_REG (Pmode, 0), insns;
7474
7475 start_sequence ();
7476 emit_call_insn (gen_tls_global_dynamic_64 (rax, x));
7477 insns = get_insns ();
7478 end_sequence ();
7479
7480 CONST_OR_PURE_CALL_P (insns) = 1;
7481 emit_libcall_block (insns, dest, rax, x);
7482 }
7483 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7484 emit_insn (gen_tls_global_dynamic_64 (dest, x));
7485 else
7486 emit_insn (gen_tls_global_dynamic_32 (dest, x));
7487
7488 if (TARGET_GNU2_TLS)
7489 {
7490 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, tp, dest));
7491
7492 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7493 }
7494 break;
7495
7496 case TLS_MODEL_LOCAL_DYNAMIC:
7497 base = gen_reg_rtx (Pmode);
7498 tp = TARGET_GNU2_TLS ? get_thread_pointer (1) : 0;
7499
7500 if (TARGET_64BIT && ! TARGET_GNU2_TLS)
7501 {
7502 rtx rax = gen_rtx_REG (Pmode, 0), insns, note;
7503
7504 start_sequence ();
7505 emit_call_insn (gen_tls_local_dynamic_base_64 (rax));
7506 insns = get_insns ();
7507 end_sequence ();
7508
7509 note = gen_rtx_EXPR_LIST (VOIDmode, const0_rtx, NULL);
7510 note = gen_rtx_EXPR_LIST (VOIDmode, ix86_tls_get_addr (), note);
7511 CONST_OR_PURE_CALL_P (insns) = 1;
7512 emit_libcall_block (insns, base, rax, note);
7513 }
7514 else if (TARGET_64BIT && TARGET_GNU2_TLS)
7515 emit_insn (gen_tls_local_dynamic_base_64 (base));
7516 else
7517 emit_insn (gen_tls_local_dynamic_base_32 (base));
7518
7519 if (TARGET_GNU2_TLS)
7520 {
7521 rtx x = ix86_tls_module_base ();
7522
7523 set_unique_reg_note (get_last_insn (), REG_EQUIV,
7524 gen_rtx_MINUS (Pmode, x, tp));
7525 }
7526
7527 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), UNSPEC_DTPOFF);
7528 off = gen_rtx_CONST (Pmode, off);
7529
7530 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, base, off));
7531
7532 if (TARGET_GNU2_TLS)
7533 {
7534 dest = force_reg (Pmode, gen_rtx_PLUS (Pmode, dest, tp));
7535
7536 set_unique_reg_note (get_last_insn (), REG_EQUIV, x);
7537 }
7538
7539 break;
7540
7541 case TLS_MODEL_INITIAL_EXEC:
7542 if (TARGET_64BIT)
7543 {
7544 pic = NULL;
7545 type = UNSPEC_GOTNTPOFF;
7546 }
7547 else if (flag_pic)
7548 {
7549 if (reload_in_progress)
7550 df_set_regs_ever_live (PIC_OFFSET_TABLE_REGNUM, true);
7551 pic = pic_offset_table_rtx;
7552 type = TARGET_ANY_GNU_TLS ? UNSPEC_GOTNTPOFF : UNSPEC_GOTTPOFF;
7553 }
7554 else if (!TARGET_ANY_GNU_TLS)
7555 {
7556 pic = gen_reg_rtx (Pmode);
7557 emit_insn (gen_set_got (pic));
7558 type = UNSPEC_GOTTPOFF;
7559 }
7560 else
7561 {
7562 pic = NULL;
7563 type = UNSPEC_INDNTPOFF;
7564 }
7565
7566 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x), type);
7567 off = gen_rtx_CONST (Pmode, off);
7568 if (pic)
7569 off = gen_rtx_PLUS (Pmode, pic, off);
7570 off = gen_const_mem (Pmode, off);
7571 set_mem_alias_set (off, ix86_GOT_alias_set ());
7572
7573 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7574 {
7575 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7576 off = force_reg (Pmode, off);
7577 return gen_rtx_PLUS (Pmode, base, off);
7578 }
7579 else
7580 {
7581 base = get_thread_pointer (true);
7582 dest = gen_reg_rtx (Pmode);
7583 emit_insn (gen_subsi3 (dest, base, off));
7584 }
7585 break;
7586
7587 case TLS_MODEL_LOCAL_EXEC:
7588 off = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, x),
7589 (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7590 ? UNSPEC_NTPOFF : UNSPEC_TPOFF);
7591 off = gen_rtx_CONST (Pmode, off);
7592
7593 if (TARGET_64BIT || TARGET_ANY_GNU_TLS)
7594 {
7595 base = get_thread_pointer (for_mov || !TARGET_TLS_DIRECT_SEG_REFS);
7596 return gen_rtx_PLUS (Pmode, base, off);
7597 }
7598 else
7599 {
7600 base = get_thread_pointer (true);
7601 dest = gen_reg_rtx (Pmode);
7602 emit_insn (gen_subsi3 (dest, base, off));
7603 }
7604 break;
7605
7606 default:
7607 gcc_unreachable ();
7608 }
7609
7610 return dest;
7611 }
7612
7613 /* Create or return the unique __imp_DECL dllimport symbol corresponding
7614 to symbol DECL. */
7615
7616 static GTY((if_marked ("tree_map_marked_p"), param_is (struct tree_map)))
7617 htab_t dllimport_map;
7618
7619 static tree
7620 get_dllimport_decl (tree decl)
7621 {
7622 struct tree_map *h, in;
7623 void **loc;
7624 const char *name;
7625 const char *prefix;
7626 size_t namelen, prefixlen;
7627 char *imp_name;
7628 tree to;
7629 rtx rtl;
7630
7631 if (!dllimport_map)
7632 dllimport_map = htab_create_ggc (512, tree_map_hash, tree_map_eq, 0);
7633
7634 in.hash = htab_hash_pointer (decl);
7635 in.base.from = decl;
7636 loc = htab_find_slot_with_hash (dllimport_map, &in, in.hash, INSERT);
7637 h = (struct tree_map *) *loc;
7638 if (h)
7639 return h->to;
7640
7641 *loc = h = GGC_NEW (struct tree_map);
7642 h->hash = in.hash;
7643 h->base.from = decl;
7644 h->to = to = build_decl (VAR_DECL, NULL, ptr_type_node);
7645 DECL_ARTIFICIAL (to) = 1;
7646 DECL_IGNORED_P (to) = 1;
7647 DECL_EXTERNAL (to) = 1;
7648 TREE_READONLY (to) = 1;
7649
7650 name = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
7651 name = targetm.strip_name_encoding (name);
7652 if (name[0] == FASTCALL_PREFIX)
7653 {
7654 name++;
7655 prefix = "*__imp_";
7656 }
7657 else
7658 prefix = "*__imp__";
7659
7660 namelen = strlen (name);
7661 prefixlen = strlen (prefix);
7662 imp_name = (char *) alloca (namelen + prefixlen + 1);
7663 memcpy (imp_name, prefix, prefixlen);
7664 memcpy (imp_name + prefixlen, name, namelen + 1);
7665
7666 name = ggc_alloc_string (imp_name, namelen + prefixlen);
7667 rtl = gen_rtx_SYMBOL_REF (Pmode, name);
7668 SET_SYMBOL_REF_DECL (rtl, to);
7669 SYMBOL_REF_FLAGS (rtl) = SYMBOL_FLAG_LOCAL;
7670
7671 rtl = gen_const_mem (Pmode, rtl);
7672 set_mem_alias_set (rtl, ix86_GOT_alias_set ());
7673
7674 SET_DECL_RTL (to, rtl);
7675
7676 return to;
7677 }
7678
7679 /* Expand SYMBOL into its corresponding dllimport symbol. WANT_REG is
7680 true if we require the result be a register. */
7681
7682 static rtx
7683 legitimize_dllimport_symbol (rtx symbol, bool want_reg)
7684 {
7685 tree imp_decl;
7686 rtx x;
7687
7688 gcc_assert (SYMBOL_REF_DECL (symbol));
7689 imp_decl = get_dllimport_decl (SYMBOL_REF_DECL (symbol));
7690
7691 x = DECL_RTL (imp_decl);
7692 if (want_reg)
7693 x = force_reg (Pmode, x);
7694 return x;
7695 }
7696
7697 /* Try machine-dependent ways of modifying an illegitimate address
7698 to be legitimate. If we find one, return the new, valid address.
7699 This macro is used in only one place: `memory_address' in explow.c.
7700
7701 OLDX is the address as it was before break_out_memory_refs was called.
7702 In some cases it is useful to look at this to decide what needs to be done.
7703
7704 MODE and WIN are passed so that this macro can use
7705 GO_IF_LEGITIMATE_ADDRESS.
7706
7707 It is always safe for this macro to do nothing. It exists to recognize
7708 opportunities to optimize the output.
7709
7710 For the 80386, we handle X+REG by loading X into a register R and
7711 using R+REG. R will go in a general reg and indexing will be used.
7712 However, if REG is a broken-out memory address or multiplication,
7713 nothing needs to be done because REG can certainly go in a general reg.
7714
7715 When -fpic is used, special handling is needed for symbolic references.
7716 See comments by legitimize_pic_address in i386.c for details. */
7717
7718 rtx
7719 legitimize_address (rtx x, rtx oldx ATTRIBUTE_UNUSED, enum machine_mode mode)
7720 {
7721 int changed = 0;
7722 unsigned log;
7723
7724 log = GET_CODE (x) == SYMBOL_REF ? SYMBOL_REF_TLS_MODEL (x) : 0;
7725 if (log)
7726 return legitimize_tls_address (x, (enum tls_model) log, false);
7727 if (GET_CODE (x) == CONST
7728 && GET_CODE (XEXP (x, 0)) == PLUS
7729 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7730 && (log = SYMBOL_REF_TLS_MODEL (XEXP (XEXP (x, 0), 0))))
7731 {
7732 rtx t = legitimize_tls_address (XEXP (XEXP (x, 0), 0),
7733 (enum tls_model) log, false);
7734 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7735 }
7736
7737 if (TARGET_DLLIMPORT_DECL_ATTRIBUTES)
7738 {
7739 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_DLLIMPORT_P (x))
7740 return legitimize_dllimport_symbol (x, true);
7741 if (GET_CODE (x) == CONST
7742 && GET_CODE (XEXP (x, 0)) == PLUS
7743 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF
7744 && SYMBOL_REF_DLLIMPORT_P (XEXP (XEXP (x, 0), 0)))
7745 {
7746 rtx t = legitimize_dllimport_symbol (XEXP (XEXP (x, 0), 0), true);
7747 return gen_rtx_PLUS (Pmode, t, XEXP (XEXP (x, 0), 1));
7748 }
7749 }
7750
7751 if (flag_pic && SYMBOLIC_CONST (x))
7752 return legitimize_pic_address (x, 0);
7753
7754 /* Canonicalize shifts by 0, 1, 2, 3 into multiply */
7755 if (GET_CODE (x) == ASHIFT
7756 && CONST_INT_P (XEXP (x, 1))
7757 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (x, 1)) < 4)
7758 {
7759 changed = 1;
7760 log = INTVAL (XEXP (x, 1));
7761 x = gen_rtx_MULT (Pmode, force_reg (Pmode, XEXP (x, 0)),
7762 GEN_INT (1 << log));
7763 }
7764
7765 if (GET_CODE (x) == PLUS)
7766 {
7767 /* Canonicalize shifts by 0, 1, 2, 3 into multiply. */
7768
7769 if (GET_CODE (XEXP (x, 0)) == ASHIFT
7770 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
7771 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 0), 1)) < 4)
7772 {
7773 changed = 1;
7774 log = INTVAL (XEXP (XEXP (x, 0), 1));
7775 XEXP (x, 0) = gen_rtx_MULT (Pmode,
7776 force_reg (Pmode, XEXP (XEXP (x, 0), 0)),
7777 GEN_INT (1 << log));
7778 }
7779
7780 if (GET_CODE (XEXP (x, 1)) == ASHIFT
7781 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
7782 && (unsigned HOST_WIDE_INT) INTVAL (XEXP (XEXP (x, 1), 1)) < 4)
7783 {
7784 changed = 1;
7785 log = INTVAL (XEXP (XEXP (x, 1), 1));
7786 XEXP (x, 1) = gen_rtx_MULT (Pmode,
7787 force_reg (Pmode, XEXP (XEXP (x, 1), 0)),
7788 GEN_INT (1 << log));
7789 }
7790
7791 /* Put multiply first if it isn't already. */
7792 if (GET_CODE (XEXP (x, 1)) == MULT)
7793 {
7794 rtx tmp = XEXP (x, 0);
7795 XEXP (x, 0) = XEXP (x, 1);
7796 XEXP (x, 1) = tmp;
7797 changed = 1;
7798 }
7799
7800 /* Canonicalize (plus (mult (reg) (const)) (plus (reg) (const)))
7801 into (plus (plus (mult (reg) (const)) (reg)) (const)). This can be
7802 created by virtual register instantiation, register elimination, and
7803 similar optimizations. */
7804 if (GET_CODE (XEXP (x, 0)) == MULT && GET_CODE (XEXP (x, 1)) == PLUS)
7805 {
7806 changed = 1;
7807 x = gen_rtx_PLUS (Pmode,
7808 gen_rtx_PLUS (Pmode, XEXP (x, 0),
7809 XEXP (XEXP (x, 1), 0)),
7810 XEXP (XEXP (x, 1), 1));
7811 }
7812
7813 /* Canonicalize
7814 (plus (plus (mult (reg) (const)) (plus (reg) (const))) const)
7815 into (plus (plus (mult (reg) (const)) (reg)) (const)). */
7816 else if (GET_CODE (x) == PLUS && GET_CODE (XEXP (x, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
7818 && GET_CODE (XEXP (XEXP (x, 0), 1)) == PLUS
7819 && CONSTANT_P (XEXP (x, 1)))
7820 {
7821 rtx constant;
7822 rtx other = NULL_RTX;
7823
7824 if (CONST_INT_P (XEXP (x, 1)))
7825 {
7826 constant = XEXP (x, 1);
7827 other = XEXP (XEXP (XEXP (x, 0), 1), 1);
7828 }
7829 else if (CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 1), 1)))
7830 {
7831 constant = XEXP (XEXP (XEXP (x, 0), 1), 1);
7832 other = XEXP (x, 1);
7833 }
7834 else
7835 constant = 0;
7836
7837 if (constant)
7838 {
7839 changed = 1;
7840 x = gen_rtx_PLUS (Pmode,
7841 gen_rtx_PLUS (Pmode, XEXP (XEXP (x, 0), 0),
7842 XEXP (XEXP (XEXP (x, 0), 1), 0)),
7843 plus_constant (other, INTVAL (constant)));
7844 }
7845 }
7846
7847 if (changed && legitimate_address_p (mode, x, FALSE))
7848 return x;
7849
7850 if (GET_CODE (XEXP (x, 0)) == MULT)
7851 {
7852 changed = 1;
7853 XEXP (x, 0) = force_operand (XEXP (x, 0), 0);
7854 }
7855
7856 if (GET_CODE (XEXP (x, 1)) == MULT)
7857 {
7858 changed = 1;
7859 XEXP (x, 1) = force_operand (XEXP (x, 1), 0);
7860 }
7861
7862 if (changed
7863 && REG_P (XEXP (x, 1))
7864 && REG_P (XEXP (x, 0)))
7865 return x;
7866
7867 if (flag_pic && SYMBOLIC_CONST (XEXP (x, 1)))
7868 {
7869 changed = 1;
7870 x = legitimize_pic_address (x, 0);
7871 }
7872
7873 if (changed && legitimate_address_p (mode, x, FALSE))
7874 return x;
7875
7876 if (REG_P (XEXP (x, 0)))
7877 {
7878 rtx temp = gen_reg_rtx (Pmode);
7879 rtx val = force_operand (XEXP (x, 1), temp);
7880 if (val != temp)
7881 emit_move_insn (temp, val);
7882
7883 XEXP (x, 1) = temp;
7884 return x;
7885 }
7886
7887 else if (REG_P (XEXP (x, 1)))
7888 {
7889 rtx temp = gen_reg_rtx (Pmode);
7890 rtx val = force_operand (XEXP (x, 0), temp);
7891 if (val != temp)
7892 emit_move_insn (temp, val);
7893
7894 XEXP (x, 0) = temp;
7895 return x;
7896 }
7897 }
7898
7899 return x;
7900 }
7901 \f
7902 /* Print an integer constant expression in assembler syntax. Addition
7903 and subtraction are the only arithmetic that may appear in these
7904 expressions. FILE is the stdio stream to write to, X is the rtx, and
7905 CODE is the operand print code from the output string. */
7906
7907 static void
7908 output_pic_addr_const (FILE *file, rtx x, int code)
7909 {
7910 char buf[256];
7911
7912 switch (GET_CODE (x))
7913 {
7914 case PC:
7915 gcc_assert (flag_pic);
7916 putc ('.', file);
7917 break;
7918
7919 case SYMBOL_REF:
7920 if (! TARGET_MACHO || TARGET_64BIT)
7921 output_addr_const (file, x);
7922 else
7923 {
7924 const char *name = XSTR (x, 0);
7925
7926 /* Mark the decl as referenced so that cgraph will
7927 output the function. */
7928 if (SYMBOL_REF_DECL (x))
7929 mark_decl_referenced (SYMBOL_REF_DECL (x));
7930
7931 #if TARGET_MACHO
7932 if (MACHOPIC_INDIRECT
7933 && machopic_classify_symbol (x) == MACHOPIC_UNDEFINED_FUNCTION)
7934 name = machopic_indirection_name (x, /*stub_p=*/true);
7935 #endif
7936 assemble_name (file, name);
7937 }
7938 if (!TARGET_MACHO && !TARGET_64BIT_MS_ABI
7939 && code == 'P' && ! SYMBOL_REF_LOCAL_P (x))
7940 fputs ("@PLT", file);
7941 break;
7942
7943 case LABEL_REF:
7944 x = XEXP (x, 0);
7945 /* FALLTHRU */
7946 case CODE_LABEL:
7947 ASM_GENERATE_INTERNAL_LABEL (buf, "L", CODE_LABEL_NUMBER (x));
7948 assemble_name (asm_out_file, buf);
7949 break;
7950
7951 case CONST_INT:
7952 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
7953 break;
7954
7955 case CONST:
7956 /* This used to output parentheses around the expression,
7957 but that does not work on the 386 (either ATT or BSD assembler). */
7958 output_pic_addr_const (file, XEXP (x, 0), code);
7959 break;
7960
7961 case CONST_DOUBLE:
7962 if (GET_MODE (x) == VOIDmode)
7963 {
7964 /* We can use %d if the number is <32 bits and positive. */
7965 if (CONST_DOUBLE_HIGH (x) || CONST_DOUBLE_LOW (x) < 0)
7966 fprintf (file, "0x%lx%08lx",
7967 (unsigned long) CONST_DOUBLE_HIGH (x),
7968 (unsigned long) CONST_DOUBLE_LOW (x));
7969 else
7970 fprintf (file, HOST_WIDE_INT_PRINT_DEC, CONST_DOUBLE_LOW (x));
7971 }
7972 else
7973 /* We can't handle floating point constants;
7974 PRINT_OPERAND must handle them. */
7975 output_operand_lossage ("floating constant misused");
7976 break;
7977
7978 case PLUS:
7979 /* Some assemblers need integer constants to appear first. */
7980 if (CONST_INT_P (XEXP (x, 0)))
7981 {
7982 output_pic_addr_const (file, XEXP (x, 0), code);
7983 putc ('+', file);
7984 output_pic_addr_const (file, XEXP (x, 1), code);
7985 }
7986 else
7987 {
7988 gcc_assert (CONST_INT_P (XEXP (x, 1)));
7989 output_pic_addr_const (file, XEXP (x, 1), code);
7990 putc ('+', file);
7991 output_pic_addr_const (file, XEXP (x, 0), code);
7992 }
7993 break;
7994
7995 case MINUS:
7996 if (!TARGET_MACHO)
7997 putc (ASSEMBLER_DIALECT == ASM_INTEL ? '(' : '[', file);
7998 output_pic_addr_const (file, XEXP (x, 0), code);
7999 putc ('-', file);
8000 output_pic_addr_const (file, XEXP (x, 1), code);
8001 if (!TARGET_MACHO)
8002 putc (ASSEMBLER_DIALECT == ASM_INTEL ? ')' : ']', file);
8003 break;
8004
8005 case UNSPEC:
8006 gcc_assert (XVECLEN (x, 0) == 1);
8007 output_pic_addr_const (file, XVECEXP (x, 0, 0), code);
8008 switch (XINT (x, 1))
8009 {
8010 case UNSPEC_GOT:
8011 fputs ("@GOT", file);
8012 break;
8013 case UNSPEC_GOTOFF:
8014 fputs ("@GOTOFF", file);
8015 break;
8016 case UNSPEC_PLTOFF:
8017 fputs ("@PLTOFF", file);
8018 break;
8019 case UNSPEC_GOTPCREL:
8020 fputs ("@GOTPCREL(%rip)", file);
8021 break;
8022 case UNSPEC_GOTTPOFF:
8023 /* FIXME: This might be @TPOFF in Sun ld too. */
8024 fputs ("@GOTTPOFF", file);
8025 break;
8026 case UNSPEC_TPOFF:
8027 fputs ("@TPOFF", file);
8028 break;
8029 case UNSPEC_NTPOFF:
8030 if (TARGET_64BIT)
8031 fputs ("@TPOFF", file);
8032 else
8033 fputs ("@NTPOFF", file);
8034 break;
8035 case UNSPEC_DTPOFF:
8036 fputs ("@DTPOFF", file);
8037 break;
8038 case UNSPEC_GOTNTPOFF:
8039 if (TARGET_64BIT)
8040 fputs ("@GOTTPOFF(%rip)", file);
8041 else
8042 fputs ("@GOTNTPOFF", file);
8043 break;
8044 case UNSPEC_INDNTPOFF:
8045 fputs ("@INDNTPOFF", file);
8046 break;
8047 default:
8048 output_operand_lossage ("invalid UNSPEC as operand");
8049 break;
8050 }
8051 break;
8052
8053 default:
8054 output_operand_lossage ("invalid expression as operand");
8055 }
8056 }
8057
8058 /* This is called from dwarf2out.c via TARGET_ASM_OUTPUT_DWARF_DTPREL.
8059 We need to emit DTP-relative relocations. */
8060
8061 static void ATTRIBUTE_UNUSED
8062 i386_output_dwarf_dtprel (FILE *file, int size, rtx x)
8063 {
8064 fputs (ASM_LONG, file);
8065 output_addr_const (file, x);
8066 fputs ("@DTPOFF", file);
8067 switch (size)
8068 {
8069 case 4:
8070 break;
8071 case 8:
8072 fputs (", 0", file);
8073 break;
8074 default:
8075 gcc_unreachable ();
8076 }
8077 }
8078
8079 /* In the name of slightly smaller debug output, and to cater to
8080 general assembler lossage, recognize PIC+GOTOFF and turn it back
8081 into a direct symbol reference.
8082
8083 On Darwin, this is necessary to avoid a crash, because Darwin
8084 has a different PIC label for each routine but the DWARF debugging
8085 information is not associated with any particular routine, so it's
8086 necessary to remove references to the PIC label from RTL stored by
8087 the DWARF output code. */
8088
8089 static rtx
8090 ix86_delegitimize_address (rtx orig_x)
8091 {
8092 rtx x = orig_x;
8093 /* reg_addend is NULL or a multiple of some register. */
8094 rtx reg_addend = NULL_RTX;
8095 /* const_addend is NULL or a const_int. */
8096 rtx const_addend = NULL_RTX;
8097 /* This is the result, or NULL. */
8098 rtx result = NULL_RTX;
8099
8100 if (MEM_P (x))
8101 x = XEXP (x, 0);
8102
8103 if (TARGET_64BIT)
8104 {
8105 if (GET_CODE (x) != CONST
8106 || GET_CODE (XEXP (x, 0)) != UNSPEC
8107 || XINT (XEXP (x, 0), 1) != UNSPEC_GOTPCREL
8108 || !MEM_P (orig_x))
8109 return orig_x;
8110 return XVECEXP (XEXP (x, 0), 0, 0);
8111 }
8112
8113 if (GET_CODE (x) != PLUS
8114 || GET_CODE (XEXP (x, 1)) != CONST)
8115 return orig_x;
8116
8117 if (REG_P (XEXP (x, 0))
8118 && REGNO (XEXP (x, 0)) == PIC_OFFSET_TABLE_REGNUM)
8119 /* %ebx + GOT/GOTOFF */
8120 ;
8121 else if (GET_CODE (XEXP (x, 0)) == PLUS)
8122 {
8123 /* %ebx + %reg * scale + GOT/GOTOFF */
8124 reg_addend = XEXP (x, 0);
8125 if (REG_P (XEXP (reg_addend, 0))
8126 && REGNO (XEXP (reg_addend, 0)) == PIC_OFFSET_TABLE_REGNUM)
8127 reg_addend = XEXP (reg_addend, 1);
8128 else if (REG_P (XEXP (reg_addend, 1))
8129 && REGNO (XEXP (reg_addend, 1)) == PIC_OFFSET_TABLE_REGNUM)
8130 reg_addend = XEXP (reg_addend, 0);
8131 else
8132 return orig_x;
8133 if (!REG_P (reg_addend)
8134 && GET_CODE (reg_addend) != MULT
8135 && GET_CODE (reg_addend) != ASHIFT)
8136 return orig_x;
8137 }
8138 else
8139 return orig_x;
8140
8141 x = XEXP (XEXP (x, 1), 0);
8142 if (GET_CODE (x) == PLUS
8143 && CONST_INT_P (XEXP (x, 1)))
8144 {
8145 const_addend = XEXP (x, 1);
8146 x = XEXP (x, 0);
8147 }
8148
8149 if (GET_CODE (x) == UNSPEC
8150 && ((XINT (x, 1) == UNSPEC_GOT && MEM_P (orig_x))
8151 || (XINT (x, 1) == UNSPEC_GOTOFF && !MEM_P (orig_x))))
8152 result = XVECEXP (x, 0, 0);
8153
8154 if (TARGET_MACHO && darwin_local_data_pic (x)
8155 && !MEM_P (orig_x))
8156 result = XEXP (x, 0);
8157
8158 if (! result)
8159 return orig_x;
8160
8161 if (const_addend)
8162 result = gen_rtx_PLUS (Pmode, result, const_addend);
8163 if (reg_addend)
8164 result = gen_rtx_PLUS (Pmode, reg_addend, result);
8165 return result;
8166 }
8167
8168 /* If X is a machine specific address (i.e. a symbol or label being
8169 referenced as a displacement from the GOT implemented using an
8170 UNSPEC), then return the base term. Otherwise return X. */
8171
8172 rtx
8173 ix86_find_base_term (rtx x)
8174 {
8175 rtx term;
8176
8177 if (TARGET_64BIT)
8178 {
8179 if (GET_CODE (x) != CONST)
8180 return x;
8181 term = XEXP (x, 0);
8182 if (GET_CODE (term) == PLUS
8183 && (CONST_INT_P (XEXP (term, 1))
8184 || GET_CODE (XEXP (term, 1)) == CONST_DOUBLE))
8185 term = XEXP (term, 0);
8186 if (GET_CODE (term) != UNSPEC
8187 || XINT (term, 1) != UNSPEC_GOTPCREL)
8188 return x;
8189
8190 term = XVECEXP (term, 0, 0);
8191
8192 if (GET_CODE (term) != SYMBOL_REF
8193 && GET_CODE (term) != LABEL_REF)
8194 return x;
8195
8196 return term;
8197 }
8198
8199 term = ix86_delegitimize_address (x);
8200
8201 if (GET_CODE (term) != SYMBOL_REF
8202 && GET_CODE (term) != LABEL_REF)
8203 return x;
8204
8205 return term;
8206 }
8207 \f
8208 static void
8209 put_condition_code (enum rtx_code code, enum machine_mode mode, int reverse,
8210 int fp, FILE *file)
8211 {
8212 const char *suffix;
8213
8214 if (mode == CCFPmode || mode == CCFPUmode)
8215 {
8216 enum rtx_code second_code, bypass_code;
8217 ix86_fp_comparison_codes (code, &bypass_code, &code, &second_code);
8218 gcc_assert (bypass_code == UNKNOWN && second_code == UNKNOWN);
8219 code = ix86_fp_compare_code_to_integer (code);
8220 mode = CCmode;
8221 }
8222 if (reverse)
8223 code = reverse_condition (code);
8224
8225 switch (code)
8226 {
8227 case EQ:
8228 switch (mode)
8229 {
8230 case CCAmode:
8231 suffix = "a";
8232 break;
8233
8234 case CCCmode:
8235 suffix = "c";
8236 break;
8237
8238 case CCOmode:
8239 suffix = "o";
8240 break;
8241
8242 case CCSmode:
8243 suffix = "s";
8244 break;
8245
8246 default:
8247 suffix = "e";
8248 }
8249 break;
8250 case NE:
8251 switch (mode)
8252 {
8253 case CCAmode:
8254 suffix = "na";
8255 break;
8256
8257 case CCCmode:
8258 suffix = "nc";
8259 break;
8260
8261 case CCOmode:
8262 suffix = "no";
8263 break;
8264
8265 case CCSmode:
8266 suffix = "ns";
8267 break;
8268
8269 default:
8270 suffix = "ne";
8271 }
8272 break;
8273 case GT:
8274 gcc_assert (mode == CCmode || mode == CCNOmode || mode == CCGCmode);
8275 suffix = "g";
8276 break;
8277 case GTU:
8278 /* ??? Use "nbe" instead of "a" for fcmov lossage on some assemblers.
8279 Those same assemblers have the same but opposite lossage on cmov. */
8280 if (mode == CCmode)
8281 suffix = fp ? "nbe" : "a";
8282 else if (mode == CCCmode)
8283 suffix = "b";
8284 else
8285 gcc_unreachable ();
8286 break;
8287 case LT:
8288 switch (mode)
8289 {
8290 case CCNOmode:
8291 case CCGOCmode:
8292 suffix = "s";
8293 break;
8294
8295 case CCmode:
8296 case CCGCmode:
8297 suffix = "l";
8298 break;
8299
8300 default:
8301 gcc_unreachable ();
8302 }
8303 break;
8304 case LTU:
8305 gcc_assert (mode == CCmode || mode == CCCmode);
8306 suffix = "b";
8307 break;
8308 case GE:
8309 switch (mode)
8310 {
8311 case CCNOmode:
8312 case CCGOCmode:
8313 suffix = "ns";
8314 break;
8315
8316 case CCmode:
8317 case CCGCmode:
8318 suffix = "ge";
8319 break;
8320
8321 default:
8322 gcc_unreachable ();
8323 }
8324 break;
8325 case GEU:
8326 /* ??? As above. */
8327 gcc_assert (mode == CCmode || mode == CCCmode);
8328 suffix = fp ? "nb" : "ae";
8329 break;
8330 case LE:
8331 gcc_assert (mode == CCmode || mode == CCGCmode || mode == CCNOmode);
8332 suffix = "le";
8333 break;
8334 case LEU:
8335 /* ??? As above. */
8336 if (mode == CCmode)
8337 suffix = "be";
8338 else if (mode == CCCmode)
8339 suffix = fp ? "nb" : "ae";
8340 else
8341 gcc_unreachable ();
8342 break;
8343 case UNORDERED:
8344 suffix = fp ? "u" : "p";
8345 break;
8346 case ORDERED:
8347 suffix = fp ? "nu" : "np";
8348 break;
8349 default:
8350 gcc_unreachable ();
8351 }
8352 fputs (suffix, file);
8353 }
8354
8355 /* Print the name of register X to FILE based on its machine mode and number.
8356 If CODE is 'w', pretend the mode is HImode.
8357 If CODE is 'b', pretend the mode is QImode.
8358 If CODE is 'k', pretend the mode is SImode.
8359 If CODE is 'q', pretend the mode is DImode.
8360 If CODE is 'h', pretend the reg is the 'high' byte register.
8361 If CODE is 'y', print "st(0)" instead of "st", if the reg is stack op. */
8362
8363 void
8364 print_reg (rtx x, int code, FILE *file)
8365 {
8366 gcc_assert (REGNO (x) != ARG_POINTER_REGNUM
8367 && REGNO (x) != FRAME_POINTER_REGNUM
8368 && REGNO (x) != FLAGS_REG
8369 && REGNO (x) != FPSR_REG
8370 && REGNO (x) != FPCR_REG);
8371
8372 if (ASSEMBLER_DIALECT == ASM_ATT || USER_LABEL_PREFIX[0] == 0)
8373 putc ('%', file);
8374
8375 if (code == 'w' || MMX_REG_P (x))
8376 code = 2;
8377 else if (code == 'b')
8378 code = 1;
8379 else if (code == 'k')
8380 code = 4;
8381 else if (code == 'q')
8382 code = 8;
8383 else if (code == 'y')
8384 code = 3;
8385 else if (code == 'h')
8386 code = 0;
8387 else
8388 code = GET_MODE_SIZE (GET_MODE (x));
8389
8390 /* Irritatingly, AMD extended registers use different naming convention
8391 from the normal registers. */
8392 if (REX_INT_REG_P (x))
8393 {
8394 gcc_assert (TARGET_64BIT);
8395 switch (code)
8396 {
8397 case 0:
8398 error ("extended registers have no high halves");
8399 break;
8400 case 1:
8401 fprintf (file, "r%ib", REGNO (x) - FIRST_REX_INT_REG + 8);
8402 break;
8403 case 2:
8404 fprintf (file, "r%iw", REGNO (x) - FIRST_REX_INT_REG + 8);
8405 break;
8406 case 4:
8407 fprintf (file, "r%id", REGNO (x) - FIRST_REX_INT_REG + 8);
8408 break;
8409 case 8:
8410 fprintf (file, "r%i", REGNO (x) - FIRST_REX_INT_REG + 8);
8411 break;
8412 default:
8413 error ("unsupported operand size for extended register");
8414 break;
8415 }
8416 return;
8417 }
8418 switch (code)
8419 {
8420 case 3:
8421 if (STACK_TOP_P (x))
8422 {
8423 fputs ("st(0)", file);
8424 break;
8425 }
8426 /* FALLTHRU */
8427 case 8:
8428 case 4:
8429 case 12:
8430 if (! ANY_FP_REG_P (x))
8431 putc (code == 8 && TARGET_64BIT ? 'r' : 'e', file);
8432 /* FALLTHRU */
8433 case 16:
8434 case 2:
8435 normal:
8436 fputs (hi_reg_name[REGNO (x)], file);
8437 break;
8438 case 1:
8439 if (REGNO (x) >= ARRAY_SIZE (qi_reg_name))
8440 goto normal;
8441 fputs (qi_reg_name[REGNO (x)], file);
8442 break;
8443 case 0:
8444 if (REGNO (x) >= ARRAY_SIZE (qi_high_reg_name))
8445 goto normal;
8446 fputs (qi_high_reg_name[REGNO (x)], file);
8447 break;
8448 default:
8449 gcc_unreachable ();
8450 }
8451 }
8452
8453 /* Locate some local-dynamic symbol still in use by this function
8454 so that we can print its name in some tls_local_dynamic_base
8455 pattern. */
8456
8457 static int
8458 get_some_local_dynamic_name_1 (rtx *px, void *data ATTRIBUTE_UNUSED)
8459 {
8460 rtx x = *px;
8461
8462 if (GET_CODE (x) == SYMBOL_REF
8463 && SYMBOL_REF_TLS_MODEL (x) == TLS_MODEL_LOCAL_DYNAMIC)
8464 {
8465 cfun->machine->some_ld_name = XSTR (x, 0);
8466 return 1;
8467 }
8468
8469 return 0;
8470 }
8471
8472 static const char *
8473 get_some_local_dynamic_name (void)
8474 {
8475 rtx insn;
8476
8477 if (cfun->machine->some_ld_name)
8478 return cfun->machine->some_ld_name;
8479
8480 for (insn = get_insns (); insn ; insn = NEXT_INSN (insn))
8481 if (INSN_P (insn)
8482 && for_each_rtx (&PATTERN (insn), get_some_local_dynamic_name_1, 0))
8483 return cfun->machine->some_ld_name;
8484
8485 gcc_unreachable ();
8486 }
8487
8488 /* Meaning of CODE:
8489 L,W,B,Q,S,T -- print the opcode suffix for specified size of operand.
8490 C -- print opcode suffix for set/cmov insn.
8491 c -- like C, but print reversed condition
8492 F,f -- likewise, but for floating-point.
8493 O -- if HAVE_AS_IX86_CMOV_SUN_SYNTAX, expand to "w.", "l." or "q.",
8494 otherwise nothing
8495 R -- print the prefix for register names.
8496 z -- print the opcode suffix for the size of the current operand.
8497 * -- print a star (in certain assembler syntax)
8498 A -- print an absolute memory reference.
8499 w -- print the operand as if it's a "word" (HImode) even if it isn't.
8500 s -- print a shift double count, followed by the assemblers argument
8501 delimiter.
8502 b -- print the QImode name of the register for the indicated operand.
8503 %b0 would print %al if operands[0] is reg 0.
8504 w -- likewise, print the HImode name of the register.
8505 k -- likewise, print the SImode name of the register.
8506 q -- likewise, print the DImode name of the register.
8507 h -- print the QImode name for a "high" register, either ah, bh, ch or dh.
8508 y -- print "st(0)" instead of "st" as a register.
8509 D -- print condition for SSE cmp instruction.
8510 P -- if PIC, print an @PLT suffix.
8511 X -- don't print any sort of PIC '@' suffix for a symbol.
8512 & -- print some in-use local-dynamic symbol name.
8513 H -- print a memory address offset by 8; used for sse high-parts
8514 + -- print a branch hint as 'cs' or 'ds' prefix
8515 ; -- print a semicolon (after prefixes due to bug in older gas).
8516 */
8517
8518 void
8519 print_operand (FILE *file, rtx x, int code)
8520 {
8521 if (code)
8522 {
8523 switch (code)
8524 {
8525 case '*':
8526 if (ASSEMBLER_DIALECT == ASM_ATT)
8527 putc ('*', file);
8528 return;
8529
8530 case '&':
8531 assemble_name (file, get_some_local_dynamic_name ());
8532 return;
8533
8534 case 'A':
8535 switch (ASSEMBLER_DIALECT)
8536 {
8537 case ASM_ATT:
8538 putc ('*', file);
8539 break;
8540
8541 case ASM_INTEL:
8542 /* Intel syntax. For absolute addresses, registers should not
8543 be surrounded by braces. */
8544 if (!REG_P (x))
8545 {
8546 putc ('[', file);
8547 PRINT_OPERAND (file, x, 0);
8548 putc (']', file);
8549 return;
8550 }
8551 break;
8552
8553 default:
8554 gcc_unreachable ();
8555 }
8556
8557 PRINT_OPERAND (file, x, 0);
8558 return;
8559
8560
8561 case 'L':
8562 if (ASSEMBLER_DIALECT == ASM_ATT)
8563 putc ('l', file);
8564 return;
8565
8566 case 'W':
8567 if (ASSEMBLER_DIALECT == ASM_ATT)
8568 putc ('w', file);
8569 return;
8570
8571 case 'B':
8572 if (ASSEMBLER_DIALECT == ASM_ATT)
8573 putc ('b', file);
8574 return;
8575
8576 case 'Q':
8577 if (ASSEMBLER_DIALECT == ASM_ATT)
8578 putc ('l', file);
8579 return;
8580
8581 case 'S':
8582 if (ASSEMBLER_DIALECT == ASM_ATT)
8583 putc ('s', file);
8584 return;
8585
8586 case 'T':
8587 if (ASSEMBLER_DIALECT == ASM_ATT)
8588 putc ('t', file);
8589 return;
8590
8591 case 'z':
8592 /* 387 opcodes don't get size suffixes if the operands are
8593 registers. */
8594 if (STACK_REG_P (x))
8595 return;
8596
8597 /* Likewise if using Intel opcodes. */
8598 if (ASSEMBLER_DIALECT == ASM_INTEL)
8599 return;
8600
8601 /* This is the size of op from size of operand. */
8602 switch (GET_MODE_SIZE (GET_MODE (x)))
8603 {
8604 case 1:
8605 putc ('b', file);
8606 return;
8607
8608 case 2:
8609 if (MEM_P (x))
8610 {
8611 #ifdef HAVE_GAS_FILDS_FISTS
8612 putc ('s', file);
8613 #endif
8614 return;
8615 }
8616 else
8617 putc ('w', file);
8618 return;
8619
8620 case 4:
8621 if (GET_MODE (x) == SFmode)
8622 {
8623 putc ('s', file);
8624 return;
8625 }
8626 else
8627 putc ('l', file);
8628 return;
8629
8630 case 12:
8631 case 16:
8632 putc ('t', file);
8633 return;
8634
8635 case 8:
8636 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_INT)
8637 {
8638 #ifdef GAS_MNEMONICS
8639 putc ('q', file);
8640 #else
8641 putc ('l', file);
8642 putc ('l', file);
8643 #endif
8644 }
8645 else
8646 putc ('l', file);
8647 return;
8648
8649 default:
8650 gcc_unreachable ();
8651 }
8652
8653 case 'b':
8654 case 'w':
8655 case 'k':
8656 case 'q':
8657 case 'h':
8658 case 'y':
8659 case 'X':
8660 case 'P':
8661 break;
8662
8663 case 's':
8664 if (CONST_INT_P (x) || ! SHIFT_DOUBLE_OMITS_COUNT)
8665 {
8666 PRINT_OPERAND (file, x, 0);
8667 putc (',', file);
8668 }
8669 return;
8670
8671 case 'D':
8672 /* Little bit of braindamage here. The SSE compare instructions
8673 does use completely different names for the comparisons that the
8674 fp conditional moves. */
8675 switch (GET_CODE (x))
8676 {
8677 case EQ:
8678 case UNEQ:
8679 fputs ("eq", file);
8680 break;
8681 case LT:
8682 case UNLT:
8683 fputs ("lt", file);
8684 break;
8685 case LE:
8686 case UNLE:
8687 fputs ("le", file);
8688 break;
8689 case UNORDERED:
8690 fputs ("unord", file);
8691 break;
8692 case NE:
8693 case LTGT:
8694 fputs ("neq", file);
8695 break;
8696 case UNGE:
8697 case GE:
8698 fputs ("nlt", file);
8699 break;
8700 case UNGT:
8701 case GT:
8702 fputs ("nle", file);
8703 break;
8704 case ORDERED:
8705 fputs ("ord", file);
8706 break;
8707 default:
8708 gcc_unreachable ();
8709 }
8710 return;
8711 case 'O':
8712 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8713 if (ASSEMBLER_DIALECT == ASM_ATT)
8714 {
8715 switch (GET_MODE (x))
8716 {
8717 case HImode: putc ('w', file); break;
8718 case SImode:
8719 case SFmode: putc ('l', file); break;
8720 case DImode:
8721 case DFmode: putc ('q', file); break;
8722 default: gcc_unreachable ();
8723 }
8724 putc ('.', file);
8725 }
8726 #endif
8727 return;
8728 case 'C':
8729 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 0, file);
8730 return;
8731 case 'F':
8732 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8733 if (ASSEMBLER_DIALECT == ASM_ATT)
8734 putc ('.', file);
8735 #endif
8736 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 0, 1, file);
8737 return;
8738
8739 /* Like above, but reverse condition */
8740 case 'c':
8741 /* Check to see if argument to %c is really a constant
8742 and not a condition code which needs to be reversed. */
8743 if (!COMPARISON_P (x))
8744 {
8745 output_operand_lossage ("operand is neither a constant nor a condition code, invalid operand code 'c'");
8746 return;
8747 }
8748 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 0, file);
8749 return;
8750 case 'f':
8751 #ifdef HAVE_AS_IX86_CMOV_SUN_SYNTAX
8752 if (ASSEMBLER_DIALECT == ASM_ATT)
8753 putc ('.', file);
8754 #endif
8755 put_condition_code (GET_CODE (x), GET_MODE (XEXP (x, 0)), 1, 1, file);
8756 return;
8757
8758 case 'H':
8759 /* It doesn't actually matter what mode we use here, as we're
8760 only going to use this for printing. */
8761 x = adjust_address_nv (x, DImode, 8);
8762 break;
8763
8764 case '+':
8765 {
8766 rtx x;
8767
8768 if (!optimize || optimize_size || !TARGET_BRANCH_PREDICTION_HINTS)
8769 return;
8770
8771 x = find_reg_note (current_output_insn, REG_BR_PROB, 0);
8772 if (x)
8773 {
8774 int pred_val = INTVAL (XEXP (x, 0));
8775
8776 if (pred_val < REG_BR_PROB_BASE * 45 / 100
8777 || pred_val > REG_BR_PROB_BASE * 55 / 100)
8778 {
8779 int taken = pred_val > REG_BR_PROB_BASE / 2;
8780 int cputaken = final_forward_branch_p (current_output_insn) == 0;
8781
8782 /* Emit hints only in the case default branch prediction
8783 heuristics would fail. */
8784 if (taken != cputaken)
8785 {
8786 /* We use 3e (DS) prefix for taken branches and
8787 2e (CS) prefix for not taken branches. */
8788 if (taken)
8789 fputs ("ds ; ", file);
8790 else
8791 fputs ("cs ; ", file);
8792 }
8793 }
8794 }
8795 return;
8796 }
8797
8798 case ';':
8799 #if TARGET_MACHO
8800 fputs (" ; ", file);
8801 #else
8802 fputc (' ', file);
8803 #endif
8804 return;
8805
8806 default:
8807 output_operand_lossage ("invalid operand code '%c'", code);
8808 }
8809 }
8810
8811 if (REG_P (x))
8812 print_reg (x, code, file);
8813
8814 else if (MEM_P (x))
8815 {
8816 /* No `byte ptr' prefix for call instructions. */
8817 if (ASSEMBLER_DIALECT == ASM_INTEL && code != 'X' && code != 'P')
8818 {
8819 const char * size;
8820 switch (GET_MODE_SIZE (GET_MODE (x)))
8821 {
8822 case 1: size = "BYTE"; break;
8823 case 2: size = "WORD"; break;
8824 case 4: size = "DWORD"; break;
8825 case 8: size = "QWORD"; break;
8826 case 12: size = "XWORD"; break;
8827 case 16: size = "XMMWORD"; break;
8828 default:
8829 gcc_unreachable ();
8830 }
8831
8832 /* Check for explicit size override (codes 'b', 'w' and 'k') */
8833 if (code == 'b')
8834 size = "BYTE";
8835 else if (code == 'w')
8836 size = "WORD";
8837 else if (code == 'k')
8838 size = "DWORD";
8839
8840 fputs (size, file);
8841 fputs (" PTR ", file);
8842 }
8843
8844 x = XEXP (x, 0);
8845 /* Avoid (%rip) for call operands. */
8846 if (CONSTANT_ADDRESS_P (x) && code == 'P'
8847 && !CONST_INT_P (x))
8848 output_addr_const (file, x);
8849 else if (this_is_asm_operands && ! address_operand (x, VOIDmode))
8850 output_operand_lossage ("invalid constraints for operand");
8851 else
8852 output_address (x);
8853 }
8854
8855 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == SFmode)
8856 {
8857 REAL_VALUE_TYPE r;
8858 long l;
8859
8860 REAL_VALUE_FROM_CONST_DOUBLE (r, x);
8861 REAL_VALUE_TO_TARGET_SINGLE (r, l);
8862
8863 if (ASSEMBLER_DIALECT == ASM_ATT)
8864 putc ('$', file);
8865 fprintf (file, "0x%08lx", l);
8866 }
8867
8868 /* These float cases don't actually occur as immediate operands. */
8869 else if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) == DFmode)
8870 {
8871 char dstr[30];
8872
8873 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8874 fprintf (file, "%s", dstr);
8875 }
8876
8877 else if (GET_CODE (x) == CONST_DOUBLE
8878 && GET_MODE (x) == XFmode)
8879 {
8880 char dstr[30];
8881
8882 real_to_decimal (dstr, CONST_DOUBLE_REAL_VALUE (x), sizeof (dstr), 0, 1);
8883 fprintf (file, "%s", dstr);
8884 }
8885
8886 else
8887 {
8888 /* We have patterns that allow zero sets of memory, for instance.
8889 In 64-bit mode, we should probably support all 8-byte vectors,
8890 since we can in fact encode that into an immediate. */
8891 if (GET_CODE (x) == CONST_VECTOR)
8892 {
8893 gcc_assert (x == CONST0_RTX (GET_MODE (x)));
8894 x = const0_rtx;
8895 }
8896
8897 if (code != 'P')
8898 {
8899 if (CONST_INT_P (x) || GET_CODE (x) == CONST_DOUBLE)
8900 {
8901 if (ASSEMBLER_DIALECT == ASM_ATT)
8902 putc ('$', file);
8903 }
8904 else if (GET_CODE (x) == CONST || GET_CODE (x) == SYMBOL_REF
8905 || GET_CODE (x) == LABEL_REF)
8906 {
8907 if (ASSEMBLER_DIALECT == ASM_ATT)
8908 putc ('$', file);
8909 else
8910 fputs ("OFFSET FLAT:", file);
8911 }
8912 }
8913 if (CONST_INT_P (x))
8914 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
8915 else if (flag_pic)
8916 output_pic_addr_const (file, x, code);
8917 else
8918 output_addr_const (file, x);
8919 }
8920 }
8921 \f
8922 /* Print a memory operand whose address is ADDR. */
8923
8924 void
8925 print_operand_address (FILE *file, rtx addr)
8926 {
8927 struct ix86_address parts;
8928 rtx base, index, disp;
8929 int scale;
8930 int ok = ix86_decompose_address (addr, &parts);
8931
8932 gcc_assert (ok);
8933
8934 base = parts.base;
8935 index = parts.index;
8936 disp = parts.disp;
8937 scale = parts.scale;
8938
8939 switch (parts.seg)
8940 {
8941 case SEG_DEFAULT:
8942 break;
8943 case SEG_FS:
8944 case SEG_GS:
8945 if (USER_LABEL_PREFIX[0] == 0)
8946 putc ('%', file);
8947 fputs ((parts.seg == SEG_FS ? "fs:" : "gs:"), file);
8948 break;
8949 default:
8950 gcc_unreachable ();
8951 }
8952
8953 if (!base && !index)
8954 {
8955 /* Displacement only requires special attention. */
8956
8957 if (CONST_INT_P (disp))
8958 {
8959 if (ASSEMBLER_DIALECT == ASM_INTEL && parts.seg == SEG_DEFAULT)
8960 {
8961 if (USER_LABEL_PREFIX[0] == 0)
8962 putc ('%', file);
8963 fputs ("ds:", file);
8964 }
8965 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (disp));
8966 }
8967 else if (flag_pic)
8968 output_pic_addr_const (file, disp, 0);
8969 else
8970 output_addr_const (file, disp);
8971
8972 /* Use one byte shorter RIP relative addressing for 64bit mode. */
8973 if (TARGET_64BIT)
8974 {
8975 if (GET_CODE (disp) == CONST
8976 && GET_CODE (XEXP (disp, 0)) == PLUS
8977 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
8978 disp = XEXP (XEXP (disp, 0), 0);
8979 if (GET_CODE (disp) == LABEL_REF
8980 || (GET_CODE (disp) == SYMBOL_REF
8981 && SYMBOL_REF_TLS_MODEL (disp) == 0))
8982 fputs ("(%rip)", file);
8983 }
8984 }
8985 else
8986 {
8987 if (ASSEMBLER_DIALECT == ASM_ATT)
8988 {
8989 if (disp)
8990 {
8991 if (flag_pic)
8992 output_pic_addr_const (file, disp, 0);
8993 else if (GET_CODE (disp) == LABEL_REF)
8994 output_asm_label (disp);
8995 else
8996 output_addr_const (file, disp);
8997 }
8998
8999 putc ('(', file);
9000 if (base)
9001 print_reg (base, 0, file);
9002 if (index)
9003 {
9004 putc (',', file);
9005 print_reg (index, 0, file);
9006 if (scale != 1)
9007 fprintf (file, ",%d", scale);
9008 }
9009 putc (')', file);
9010 }
9011 else
9012 {
9013 rtx offset = NULL_RTX;
9014
9015 if (disp)
9016 {
9017 /* Pull out the offset of a symbol; print any symbol itself. */
9018 if (GET_CODE (disp) == CONST
9019 && GET_CODE (XEXP (disp, 0)) == PLUS
9020 && CONST_INT_P (XEXP (XEXP (disp, 0), 1)))
9021 {
9022 offset = XEXP (XEXP (disp, 0), 1);
9023 disp = gen_rtx_CONST (VOIDmode,
9024 XEXP (XEXP (disp, 0), 0));
9025 }
9026
9027 if (flag_pic)
9028 output_pic_addr_const (file, disp, 0);
9029 else if (GET_CODE (disp) == LABEL_REF)
9030 output_asm_label (disp);
9031 else if (CONST_INT_P (disp))
9032 offset = disp;
9033 else
9034 output_addr_const (file, disp);
9035 }
9036
9037 putc ('[', file);
9038 if (base)
9039 {
9040 print_reg (base, 0, file);
9041 if (offset)
9042 {
9043 if (INTVAL (offset) >= 0)
9044 putc ('+', file);
9045 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9046 }
9047 }
9048 else if (offset)
9049 fprintf (file, HOST_WIDE_INT_PRINT_DEC, INTVAL (offset));
9050 else
9051 putc ('0', file);
9052
9053 if (index)
9054 {
9055 putc ('+', file);
9056 print_reg (index, 0, file);
9057 if (scale != 1)
9058 fprintf (file, "*%d", scale);
9059 }
9060 putc (']', file);
9061 }
9062 }
9063 }
9064
9065 bool
9066 output_addr_const_extra (FILE *file, rtx x)
9067 {
9068 rtx op;
9069
9070 if (GET_CODE (x) != UNSPEC)
9071 return false;
9072
9073 op = XVECEXP (x, 0, 0);
9074 switch (XINT (x, 1))
9075 {
9076 case UNSPEC_GOTTPOFF:
9077 output_addr_const (file, op);
9078 /* FIXME: This might be @TPOFF in Sun ld. */
9079 fputs ("@GOTTPOFF", file);
9080 break;
9081 case UNSPEC_TPOFF:
9082 output_addr_const (file, op);
9083 fputs ("@TPOFF", file);
9084 break;
9085 case UNSPEC_NTPOFF:
9086 output_addr_const (file, op);
9087 if (TARGET_64BIT)
9088 fputs ("@TPOFF", file);
9089 else
9090 fputs ("@NTPOFF", file);
9091 break;
9092 case UNSPEC_DTPOFF:
9093 output_addr_const (file, op);
9094 fputs ("@DTPOFF", file);
9095 break;
9096 case UNSPEC_GOTNTPOFF:
9097 output_addr_const (file, op);
9098 if (TARGET_64BIT)
9099 fputs ("@GOTTPOFF(%rip)", file);
9100 else
9101 fputs ("@GOTNTPOFF", file);
9102 break;
9103 case UNSPEC_INDNTPOFF:
9104 output_addr_const (file, op);
9105 fputs ("@INDNTPOFF", file);
9106 break;
9107
9108 default:
9109 return false;
9110 }
9111
9112 return true;
9113 }
9114 \f
9115 /* Split one or more DImode RTL references into pairs of SImode
9116 references. The RTL can be REG, offsettable MEM, integer constant, or
9117 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9118 split and "num" is its length. lo_half and hi_half are output arrays
9119 that parallel "operands". */
9120
9121 void
9122 split_di (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9123 {
9124 while (num--)
9125 {
9126 rtx op = operands[num];
9127
9128 /* simplify_subreg refuse to split volatile memory addresses,
9129 but we still have to handle it. */
9130 if (MEM_P (op))
9131 {
9132 lo_half[num] = adjust_address (op, SImode, 0);
9133 hi_half[num] = adjust_address (op, SImode, 4);
9134 }
9135 else
9136 {
9137 lo_half[num] = simplify_gen_subreg (SImode, op,
9138 GET_MODE (op) == VOIDmode
9139 ? DImode : GET_MODE (op), 0);
9140 hi_half[num] = simplify_gen_subreg (SImode, op,
9141 GET_MODE (op) == VOIDmode
9142 ? DImode : GET_MODE (op), 4);
9143 }
9144 }
9145 }
9146 /* Split one or more TImode RTL references into pairs of DImode
9147 references. The RTL can be REG, offsettable MEM, integer constant, or
9148 CONST_DOUBLE. "operands" is a pointer to an array of DImode RTL to
9149 split and "num" is its length. lo_half and hi_half are output arrays
9150 that parallel "operands". */
9151
9152 void
9153 split_ti (rtx operands[], int num, rtx lo_half[], rtx hi_half[])
9154 {
9155 while (num--)
9156 {
9157 rtx op = operands[num];
9158
9159 /* simplify_subreg refuse to split volatile memory addresses, but we
9160 still have to handle it. */
9161 if (MEM_P (op))
9162 {
9163 lo_half[num] = adjust_address (op, DImode, 0);
9164 hi_half[num] = adjust_address (op, DImode, 8);
9165 }
9166 else
9167 {
9168 lo_half[num] = simplify_gen_subreg (DImode, op, TImode, 0);
9169 hi_half[num] = simplify_gen_subreg (DImode, op, TImode, 8);
9170 }
9171 }
9172 }
9173 \f
9174 /* Output code to perform a 387 binary operation in INSN, one of PLUS,
9175 MINUS, MULT or DIV. OPERANDS are the insn operands, where operands[3]
9176 is the expression of the binary operation. The output may either be
9177 emitted here, or returned to the caller, like all output_* functions.
9178
9179 There is no guarantee that the operands are the same mode, as they
9180 might be within FLOAT or FLOAT_EXTEND expressions. */
9181
9182 #ifndef SYSV386_COMPAT
9183 /* Set to 1 for compatibility with brain-damaged assemblers. No-one
9184 wants to fix the assemblers because that causes incompatibility
9185 with gcc. No-one wants to fix gcc because that causes
9186 incompatibility with assemblers... You can use the option of
9187 -DSYSV386_COMPAT=0 if you recompile both gcc and gas this way. */
9188 #define SYSV386_COMPAT 1
9189 #endif
9190
9191 const char *
9192 output_387_binary_op (rtx insn, rtx *operands)
9193 {
9194 static char buf[30];
9195 const char *p;
9196 const char *ssep;
9197 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]) || SSE_REG_P (operands[2]);
9198
9199 #ifdef ENABLE_CHECKING
9200 /* Even if we do not want to check the inputs, this documents input
9201 constraints. Which helps in understanding the following code. */
9202 if (STACK_REG_P (operands[0])
9203 && ((REG_P (operands[1])
9204 && REGNO (operands[0]) == REGNO (operands[1])
9205 && (STACK_REG_P (operands[2]) || MEM_P (operands[2])))
9206 || (REG_P (operands[2])
9207 && REGNO (operands[0]) == REGNO (operands[2])
9208 && (STACK_REG_P (operands[1]) || MEM_P (operands[1]))))
9209 && (STACK_TOP_P (operands[1]) || STACK_TOP_P (operands[2])))
9210 ; /* ok */
9211 else
9212 gcc_assert (is_sse);
9213 #endif
9214
9215 switch (GET_CODE (operands[3]))
9216 {
9217 case PLUS:
9218 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9219 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9220 p = "fiadd";
9221 else
9222 p = "fadd";
9223 ssep = "add";
9224 break;
9225
9226 case MINUS:
9227 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9228 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9229 p = "fisub";
9230 else
9231 p = "fsub";
9232 ssep = "sub";
9233 break;
9234
9235 case MULT:
9236 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9237 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9238 p = "fimul";
9239 else
9240 p = "fmul";
9241 ssep = "mul";
9242 break;
9243
9244 case DIV:
9245 if (GET_MODE_CLASS (GET_MODE (operands[1])) == MODE_INT
9246 || GET_MODE_CLASS (GET_MODE (operands[2])) == MODE_INT)
9247 p = "fidiv";
9248 else
9249 p = "fdiv";
9250 ssep = "div";
9251 break;
9252
9253 default:
9254 gcc_unreachable ();
9255 }
9256
9257 if (is_sse)
9258 {
9259 strcpy (buf, ssep);
9260 if (GET_MODE (operands[0]) == SFmode)
9261 strcat (buf, "ss\t{%2, %0|%0, %2}");
9262 else
9263 strcat (buf, "sd\t{%2, %0|%0, %2}");
9264 return buf;
9265 }
9266 strcpy (buf, p);
9267
9268 switch (GET_CODE (operands[3]))
9269 {
9270 case MULT:
9271 case PLUS:
9272 if (REG_P (operands[2]) && REGNO (operands[0]) == REGNO (operands[2]))
9273 {
9274 rtx temp = operands[2];
9275 operands[2] = operands[1];
9276 operands[1] = temp;
9277 }
9278
9279 /* know operands[0] == operands[1]. */
9280
9281 if (MEM_P (operands[2]))
9282 {
9283 p = "%z2\t%2";
9284 break;
9285 }
9286
9287 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9288 {
9289 if (STACK_TOP_P (operands[0]))
9290 /* How is it that we are storing to a dead operand[2]?
9291 Well, presumably operands[1] is dead too. We can't
9292 store the result to st(0) as st(0) gets popped on this
9293 instruction. Instead store to operands[2] (which I
9294 think has to be st(1)). st(1) will be popped later.
9295 gcc <= 2.8.1 didn't have this check and generated
9296 assembly code that the Unixware assembler rejected. */
9297 p = "p\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9298 else
9299 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9300 break;
9301 }
9302
9303 if (STACK_TOP_P (operands[0]))
9304 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9305 else
9306 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9307 break;
9308
9309 case MINUS:
9310 case DIV:
9311 if (MEM_P (operands[1]))
9312 {
9313 p = "r%z1\t%1";
9314 break;
9315 }
9316
9317 if (MEM_P (operands[2]))
9318 {
9319 p = "%z2\t%2";
9320 break;
9321 }
9322
9323 if (find_regno_note (insn, REG_DEAD, REGNO (operands[2])))
9324 {
9325 #if SYSV386_COMPAT
9326 /* The SystemV/386 SVR3.2 assembler, and probably all AT&T
9327 derived assemblers, confusingly reverse the direction of
9328 the operation for fsub{r} and fdiv{r} when the
9329 destination register is not st(0). The Intel assembler
9330 doesn't have this brain damage. Read !SYSV386_COMPAT to
9331 figure out what the hardware really does. */
9332 if (STACK_TOP_P (operands[0]))
9333 p = "{p\t%0, %2|rp\t%2, %0}";
9334 else
9335 p = "{rp\t%2, %0|p\t%0, %2}";
9336 #else
9337 if (STACK_TOP_P (operands[0]))
9338 /* As above for fmul/fadd, we can't store to st(0). */
9339 p = "rp\t{%0, %2|%2, %0}"; /* st(1) = st(0) op st(1); pop */
9340 else
9341 p = "p\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0); pop */
9342 #endif
9343 break;
9344 }
9345
9346 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
9347 {
9348 #if SYSV386_COMPAT
9349 if (STACK_TOP_P (operands[0]))
9350 p = "{rp\t%0, %1|p\t%1, %0}";
9351 else
9352 p = "{p\t%1, %0|rp\t%0, %1}";
9353 #else
9354 if (STACK_TOP_P (operands[0]))
9355 p = "p\t{%0, %1|%1, %0}"; /* st(1) = st(1) op st(0); pop */
9356 else
9357 p = "rp\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2); pop */
9358 #endif
9359 break;
9360 }
9361
9362 if (STACK_TOP_P (operands[0]))
9363 {
9364 if (STACK_TOP_P (operands[1]))
9365 p = "\t{%y2, %0|%0, %y2}"; /* st(0) = st(0) op st(r2) */
9366 else
9367 p = "r\t{%y1, %0|%0, %y1}"; /* st(0) = st(r1) op st(0) */
9368 break;
9369 }
9370 else if (STACK_TOP_P (operands[1]))
9371 {
9372 #if SYSV386_COMPAT
9373 p = "{\t%1, %0|r\t%0, %1}";
9374 #else
9375 p = "r\t{%1, %0|%0, %1}"; /* st(r2) = st(0) op st(r2) */
9376 #endif
9377 }
9378 else
9379 {
9380 #if SYSV386_COMPAT
9381 p = "{r\t%2, %0|\t%0, %2}";
9382 #else
9383 p = "\t{%2, %0|%0, %2}"; /* st(r1) = st(r1) op st(0) */
9384 #endif
9385 }
9386 break;
9387
9388 default:
9389 gcc_unreachable ();
9390 }
9391
9392 strcat (buf, p);
9393 return buf;
9394 }
9395
9396 /* Return needed mode for entity in optimize_mode_switching pass. */
9397
9398 int
9399 ix86_mode_needed (int entity, rtx insn)
9400 {
9401 enum attr_i387_cw mode;
9402
9403 /* The mode UNINITIALIZED is used to store control word after a
9404 function call or ASM pattern. The mode ANY specify that function
9405 has no requirements on the control word and make no changes in the
9406 bits we are interested in. */
9407
9408 if (CALL_P (insn)
9409 || (NONJUMP_INSN_P (insn)
9410 && (asm_noperands (PATTERN (insn)) >= 0
9411 || GET_CODE (PATTERN (insn)) == ASM_INPUT)))
9412 return I387_CW_UNINITIALIZED;
9413
9414 if (recog_memoized (insn) < 0)
9415 return I387_CW_ANY;
9416
9417 mode = get_attr_i387_cw (insn);
9418
9419 switch (entity)
9420 {
9421 case I387_TRUNC:
9422 if (mode == I387_CW_TRUNC)
9423 return mode;
9424 break;
9425
9426 case I387_FLOOR:
9427 if (mode == I387_CW_FLOOR)
9428 return mode;
9429 break;
9430
9431 case I387_CEIL:
9432 if (mode == I387_CW_CEIL)
9433 return mode;
9434 break;
9435
9436 case I387_MASK_PM:
9437 if (mode == I387_CW_MASK_PM)
9438 return mode;
9439 break;
9440
9441 default:
9442 gcc_unreachable ();
9443 }
9444
9445 return I387_CW_ANY;
9446 }
9447
9448 /* Output code to initialize control word copies used by trunc?f?i and
9449 rounding patterns. CURRENT_MODE is set to current control word,
9450 while NEW_MODE is set to new control word. */
9451
9452 void
9453 emit_i387_cw_initialization (int mode)
9454 {
9455 rtx stored_mode = assign_386_stack_local (HImode, SLOT_CW_STORED);
9456 rtx new_mode;
9457
9458 enum ix86_stack_slot slot;
9459
9460 rtx reg = gen_reg_rtx (HImode);
9461
9462 emit_insn (gen_x86_fnstcw_1 (stored_mode));
9463 emit_move_insn (reg, copy_rtx (stored_mode));
9464
9465 if (TARGET_64BIT || TARGET_PARTIAL_REG_STALL || optimize_size)
9466 {
9467 switch (mode)
9468 {
9469 case I387_CW_TRUNC:
9470 /* round toward zero (truncate) */
9471 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0c00)));
9472 slot = SLOT_CW_TRUNC;
9473 break;
9474
9475 case I387_CW_FLOOR:
9476 /* round down toward -oo */
9477 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9478 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0400)));
9479 slot = SLOT_CW_FLOOR;
9480 break;
9481
9482 case I387_CW_CEIL:
9483 /* round up toward +oo */
9484 emit_insn (gen_andhi3 (reg, reg, GEN_INT (~0x0c00)));
9485 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0800)));
9486 slot = SLOT_CW_CEIL;
9487 break;
9488
9489 case I387_CW_MASK_PM:
9490 /* mask precision exception for nearbyint() */
9491 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9492 slot = SLOT_CW_MASK_PM;
9493 break;
9494
9495 default:
9496 gcc_unreachable ();
9497 }
9498 }
9499 else
9500 {
9501 switch (mode)
9502 {
9503 case I387_CW_TRUNC:
9504 /* round toward zero (truncate) */
9505 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0xc)));
9506 slot = SLOT_CW_TRUNC;
9507 break;
9508
9509 case I387_CW_FLOOR:
9510 /* round down toward -oo */
9511 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x4)));
9512 slot = SLOT_CW_FLOOR;
9513 break;
9514
9515 case I387_CW_CEIL:
9516 /* round up toward +oo */
9517 emit_insn (gen_movsi_insv_1 (reg, GEN_INT (0x8)));
9518 slot = SLOT_CW_CEIL;
9519 break;
9520
9521 case I387_CW_MASK_PM:
9522 /* mask precision exception for nearbyint() */
9523 emit_insn (gen_iorhi3 (reg, reg, GEN_INT (0x0020)));
9524 slot = SLOT_CW_MASK_PM;
9525 break;
9526
9527 default:
9528 gcc_unreachable ();
9529 }
9530 }
9531
9532 gcc_assert (slot < MAX_386_STACK_LOCALS);
9533
9534 new_mode = assign_386_stack_local (HImode, slot);
9535 emit_move_insn (new_mode, reg);
9536 }
9537
9538 /* Output code for INSN to convert a float to a signed int. OPERANDS
9539 are the insn operands. The output may be [HSD]Imode and the input
9540 operand may be [SDX]Fmode. */
9541
9542 const char *
9543 output_fix_trunc (rtx insn, rtx *operands, int fisttp)
9544 {
9545 int stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9546 int dimode_p = GET_MODE (operands[0]) == DImode;
9547 int round_mode = get_attr_i387_cw (insn);
9548
9549 /* Jump through a hoop or two for DImode, since the hardware has no
9550 non-popping instruction. We used to do this a different way, but
9551 that was somewhat fragile and broke with post-reload splitters. */
9552 if ((dimode_p || fisttp) && !stack_top_dies)
9553 output_asm_insn ("fld\t%y1", operands);
9554
9555 gcc_assert (STACK_TOP_P (operands[1]));
9556 gcc_assert (MEM_P (operands[0]));
9557 gcc_assert (GET_MODE (operands[1]) != TFmode);
9558
9559 if (fisttp)
9560 output_asm_insn ("fisttp%z0\t%0", operands);
9561 else
9562 {
9563 if (round_mode != I387_CW_ANY)
9564 output_asm_insn ("fldcw\t%3", operands);
9565 if (stack_top_dies || dimode_p)
9566 output_asm_insn ("fistp%z0\t%0", operands);
9567 else
9568 output_asm_insn ("fist%z0\t%0", operands);
9569 if (round_mode != I387_CW_ANY)
9570 output_asm_insn ("fldcw\t%2", operands);
9571 }
9572
9573 return "";
9574 }
9575
9576 /* Output code for x87 ffreep insn. The OPNO argument, which may only
9577 have the values zero or one, indicates the ffreep insn's operand
9578 from the OPERANDS array. */
9579
9580 static const char *
9581 output_387_ffreep (rtx *operands ATTRIBUTE_UNUSED, int opno)
9582 {
9583 if (TARGET_USE_FFREEP)
9584 #if HAVE_AS_IX86_FFREEP
9585 return opno ? "ffreep\t%y1" : "ffreep\t%y0";
9586 #else
9587 {
9588 static char retval[] = ".word\t0xc_df";
9589 int regno = REGNO (operands[opno]);
9590
9591 gcc_assert (FP_REGNO_P (regno));
9592
9593 retval[9] = '0' + (regno - FIRST_STACK_REG);
9594 return retval;
9595 }
9596 #endif
9597
9598 return opno ? "fstp\t%y1" : "fstp\t%y0";
9599 }
9600
9601
9602 /* Output code for INSN to compare OPERANDS. EFLAGS_P is 1 when fcomi
9603 should be used. UNORDERED_P is true when fucom should be used. */
9604
9605 const char *
9606 output_fp_compare (rtx insn, rtx *operands, int eflags_p, int unordered_p)
9607 {
9608 int stack_top_dies;
9609 rtx cmp_op0, cmp_op1;
9610 int is_sse = SSE_REG_P (operands[0]) || SSE_REG_P (operands[1]);
9611
9612 if (eflags_p)
9613 {
9614 cmp_op0 = operands[0];
9615 cmp_op1 = operands[1];
9616 }
9617 else
9618 {
9619 cmp_op0 = operands[1];
9620 cmp_op1 = operands[2];
9621 }
9622
9623 if (is_sse)
9624 {
9625 if (GET_MODE (operands[0]) == SFmode)
9626 if (unordered_p)
9627 return "ucomiss\t{%1, %0|%0, %1}";
9628 else
9629 return "comiss\t{%1, %0|%0, %1}";
9630 else
9631 if (unordered_p)
9632 return "ucomisd\t{%1, %0|%0, %1}";
9633 else
9634 return "comisd\t{%1, %0|%0, %1}";
9635 }
9636
9637 gcc_assert (STACK_TOP_P (cmp_op0));
9638
9639 stack_top_dies = find_regno_note (insn, REG_DEAD, FIRST_STACK_REG) != 0;
9640
9641 if (cmp_op1 == CONST0_RTX (GET_MODE (cmp_op1)))
9642 {
9643 if (stack_top_dies)
9644 {
9645 output_asm_insn ("ftst\n\tfnstsw\t%0", operands);
9646 return output_387_ffreep (operands, 1);
9647 }
9648 else
9649 return "ftst\n\tfnstsw\t%0";
9650 }
9651
9652 if (STACK_REG_P (cmp_op1)
9653 && stack_top_dies
9654 && find_regno_note (insn, REG_DEAD, REGNO (cmp_op1))
9655 && REGNO (cmp_op1) != FIRST_STACK_REG)
9656 {
9657 /* If both the top of the 387 stack dies, and the other operand
9658 is also a stack register that dies, then this must be a
9659 `fcompp' float compare */
9660
9661 if (eflags_p)
9662 {
9663 /* There is no double popping fcomi variant. Fortunately,
9664 eflags is immune from the fstp's cc clobbering. */
9665 if (unordered_p)
9666 output_asm_insn ("fucomip\t{%y1, %0|%0, %y1}", operands);
9667 else
9668 output_asm_insn ("fcomip\t{%y1, %0|%0, %y1}", operands);
9669 return output_387_ffreep (operands, 0);
9670 }
9671 else
9672 {
9673 if (unordered_p)
9674 return "fucompp\n\tfnstsw\t%0";
9675 else
9676 return "fcompp\n\tfnstsw\t%0";
9677 }
9678 }
9679 else
9680 {
9681 /* Encoded here as eflags_p | intmode | unordered_p | stack_top_dies. */
9682
9683 static const char * const alt[16] =
9684 {
9685 "fcom%z2\t%y2\n\tfnstsw\t%0",
9686 "fcomp%z2\t%y2\n\tfnstsw\t%0",
9687 "fucom%z2\t%y2\n\tfnstsw\t%0",
9688 "fucomp%z2\t%y2\n\tfnstsw\t%0",
9689
9690 "ficom%z2\t%y2\n\tfnstsw\t%0",
9691 "ficomp%z2\t%y2\n\tfnstsw\t%0",
9692 NULL,
9693 NULL,
9694
9695 "fcomi\t{%y1, %0|%0, %y1}",
9696 "fcomip\t{%y1, %0|%0, %y1}",
9697 "fucomi\t{%y1, %0|%0, %y1}",
9698 "fucomip\t{%y1, %0|%0, %y1}",
9699
9700 NULL,
9701 NULL,
9702 NULL,
9703 NULL
9704 };
9705
9706 int mask;
9707 const char *ret;
9708
9709 mask = eflags_p << 3;
9710 mask |= (GET_MODE_CLASS (GET_MODE (cmp_op1)) == MODE_INT) << 2;
9711 mask |= unordered_p << 1;
9712 mask |= stack_top_dies;
9713
9714 gcc_assert (mask < 16);
9715 ret = alt[mask];
9716 gcc_assert (ret);
9717
9718 return ret;
9719 }
9720 }
9721
9722 void
9723 ix86_output_addr_vec_elt (FILE *file, int value)
9724 {
9725 const char *directive = ASM_LONG;
9726
9727 #ifdef ASM_QUAD
9728 if (TARGET_64BIT)
9729 directive = ASM_QUAD;
9730 #else
9731 gcc_assert (!TARGET_64BIT);
9732 #endif
9733
9734 fprintf (file, "%s%s%d\n", directive, LPREFIX, value);
9735 }
9736
9737 void
9738 ix86_output_addr_diff_elt (FILE *file, int value, int rel)
9739 {
9740 const char *directive = ASM_LONG;
9741
9742 #ifdef ASM_QUAD
9743 if (TARGET_64BIT && CASE_VECTOR_MODE == DImode)
9744 directive = ASM_QUAD;
9745 #else
9746 gcc_assert (!TARGET_64BIT);
9747 #endif
9748 /* We can't use @GOTOFF for text labels on VxWorks; see gotoff_operand. */
9749 if (TARGET_64BIT || TARGET_VXWORKS_RTP)
9750 fprintf (file, "%s%s%d-%s%d\n",
9751 directive, LPREFIX, value, LPREFIX, rel);
9752 else if (HAVE_AS_GOTOFF_IN_DATA)
9753 fprintf (file, "%s%s%d@GOTOFF\n", ASM_LONG, LPREFIX, value);
9754 #if TARGET_MACHO
9755 else if (TARGET_MACHO)
9756 {
9757 fprintf (file, "%s%s%d-", ASM_LONG, LPREFIX, value);
9758 machopic_output_function_base_name (file);
9759 fprintf(file, "\n");
9760 }
9761 #endif
9762 else
9763 asm_fprintf (file, "%s%U%s+[.-%s%d]\n",
9764 ASM_LONG, GOT_SYMBOL_NAME, LPREFIX, value);
9765 }
9766 \f
9767 /* Generate either "mov $0, reg" or "xor reg, reg", as appropriate
9768 for the target. */
9769
9770 void
9771 ix86_expand_clear (rtx dest)
9772 {
9773 rtx tmp;
9774
9775 /* We play register width games, which are only valid after reload. */
9776 gcc_assert (reload_completed);
9777
9778 /* Avoid HImode and its attendant prefix byte. */
9779 if (GET_MODE_SIZE (GET_MODE (dest)) < 4)
9780 dest = gen_rtx_REG (SImode, REGNO (dest));
9781 tmp = gen_rtx_SET (VOIDmode, dest, const0_rtx);
9782
9783 /* This predicate should match that for movsi_xor and movdi_xor_rex64. */
9784 if (reload_completed && (!TARGET_USE_MOV0 || optimize_size))
9785 {
9786 rtx clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
9787 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, clob));
9788 }
9789
9790 emit_insn (tmp);
9791 }
9792
9793 /* X is an unchanging MEM. If it is a constant pool reference, return
9794 the constant pool rtx, else NULL. */
9795
9796 rtx
9797 maybe_get_pool_constant (rtx x)
9798 {
9799 x = ix86_delegitimize_address (XEXP (x, 0));
9800
9801 if (GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9802 return get_pool_constant (x);
9803
9804 return NULL_RTX;
9805 }
9806
9807 void
9808 ix86_expand_move (enum machine_mode mode, rtx operands[])
9809 {
9810 int strict = (reload_in_progress || reload_completed);
9811 rtx op0, op1;
9812 enum tls_model model;
9813
9814 op0 = operands[0];
9815 op1 = operands[1];
9816
9817 if (GET_CODE (op1) == SYMBOL_REF)
9818 {
9819 model = SYMBOL_REF_TLS_MODEL (op1);
9820 if (model)
9821 {
9822 op1 = legitimize_tls_address (op1, model, true);
9823 op1 = force_operand (op1, op0);
9824 if (op1 == op0)
9825 return;
9826 }
9827 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9828 && SYMBOL_REF_DLLIMPORT_P (op1))
9829 op1 = legitimize_dllimport_symbol (op1, false);
9830 }
9831 else if (GET_CODE (op1) == CONST
9832 && GET_CODE (XEXP (op1, 0)) == PLUS
9833 && GET_CODE (XEXP (XEXP (op1, 0), 0)) == SYMBOL_REF)
9834 {
9835 rtx addend = XEXP (XEXP (op1, 0), 1);
9836 rtx symbol = XEXP (XEXP (op1, 0), 0);
9837 rtx tmp = NULL;
9838
9839 model = SYMBOL_REF_TLS_MODEL (symbol);
9840 if (model)
9841 tmp = legitimize_tls_address (symbol, model, true);
9842 else if (TARGET_DLLIMPORT_DECL_ATTRIBUTES
9843 && SYMBOL_REF_DLLIMPORT_P (symbol))
9844 tmp = legitimize_dllimport_symbol (symbol, true);
9845
9846 if (tmp)
9847 {
9848 tmp = force_operand (tmp, NULL);
9849 tmp = expand_simple_binop (Pmode, PLUS, tmp, addend,
9850 op0, 1, OPTAB_DIRECT);
9851 if (tmp == op0)
9852 return;
9853 }
9854 }
9855
9856 if (flag_pic && mode == Pmode && symbolic_operand (op1, Pmode))
9857 {
9858 if (TARGET_MACHO && !TARGET_64BIT)
9859 {
9860 #if TARGET_MACHO
9861 if (MACHOPIC_PURE)
9862 {
9863 rtx temp = ((reload_in_progress
9864 || ((op0 && REG_P (op0))
9865 && mode == Pmode))
9866 ? op0 : gen_reg_rtx (Pmode));
9867 op1 = machopic_indirect_data_reference (op1, temp);
9868 op1 = machopic_legitimize_pic_address (op1, mode,
9869 temp == op1 ? 0 : temp);
9870 }
9871 else if (MACHOPIC_INDIRECT)
9872 op1 = machopic_indirect_data_reference (op1, 0);
9873 if (op0 == op1)
9874 return;
9875 #endif
9876 }
9877 else
9878 {
9879 if (MEM_P (op0))
9880 op1 = force_reg (Pmode, op1);
9881 else if (!TARGET_64BIT || !x86_64_movabs_operand (op1, Pmode))
9882 {
9883 rtx reg = !can_create_pseudo_p () ? op0 : NULL_RTX;
9884 op1 = legitimize_pic_address (op1, reg);
9885 if (op0 == op1)
9886 return;
9887 }
9888 }
9889 }
9890 else
9891 {
9892 if (MEM_P (op0)
9893 && (PUSH_ROUNDING (GET_MODE_SIZE (mode)) != GET_MODE_SIZE (mode)
9894 || !push_operand (op0, mode))
9895 && MEM_P (op1))
9896 op1 = force_reg (mode, op1);
9897
9898 if (push_operand (op0, mode)
9899 && ! general_no_elim_operand (op1, mode))
9900 op1 = copy_to_mode_reg (mode, op1);
9901
9902 /* Force large constants in 64bit compilation into register
9903 to get them CSEed. */
9904 if (TARGET_64BIT && mode == DImode
9905 && immediate_operand (op1, mode)
9906 && !x86_64_zext_immediate_operand (op1, VOIDmode)
9907 && !register_operand (op0, mode)
9908 && optimize && !reload_completed && !reload_in_progress)
9909 op1 = copy_to_mode_reg (mode, op1);
9910
9911 if (FLOAT_MODE_P (mode))
9912 {
9913 /* If we are loading a floating point constant to a register,
9914 force the value to memory now, since we'll get better code
9915 out the back end. */
9916
9917 if (strict)
9918 ;
9919 else if (GET_CODE (op1) == CONST_DOUBLE)
9920 {
9921 op1 = validize_mem (force_const_mem (mode, op1));
9922 if (!register_operand (op0, mode))
9923 {
9924 rtx temp = gen_reg_rtx (mode);
9925 emit_insn (gen_rtx_SET (VOIDmode, temp, op1));
9926 emit_move_insn (op0, temp);
9927 return;
9928 }
9929 }
9930 }
9931 }
9932
9933 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9934 }
9935
9936 void
9937 ix86_expand_vector_move (enum machine_mode mode, rtx operands[])
9938 {
9939 rtx op0 = operands[0], op1 = operands[1];
9940 unsigned int align = GET_MODE_ALIGNMENT (mode);
9941
9942 /* Force constants other than zero into memory. We do not know how
9943 the instructions used to build constants modify the upper 64 bits
9944 of the register, once we have that information we may be able
9945 to handle some of them more efficiently. */
9946 if ((reload_in_progress | reload_completed) == 0
9947 && register_operand (op0, mode)
9948 && (CONSTANT_P (op1)
9949 || (GET_CODE (op1) == SUBREG
9950 && CONSTANT_P (SUBREG_REG (op1))))
9951 && standard_sse_constant_p (op1) <= 0)
9952 op1 = validize_mem (force_const_mem (mode, op1));
9953
9954 /* TDmode values are passed as TImode on the stack. Timode values
9955 are moved via xmm registers, and moving them to stack can result in
9956 unaligned memory access. Use ix86_expand_vector_move_misalign()
9957 if memory operand is not aligned correctly. */
9958 if (can_create_pseudo_p ()
9959 && (mode == TImode) && !TARGET_64BIT
9960 && ((MEM_P (op0) && (MEM_ALIGN (op0) < align))
9961 || (MEM_P (op1) && (MEM_ALIGN (op1) < align))))
9962 {
9963 rtx tmp[2];
9964
9965 /* ix86_expand_vector_move_misalign() does not like constants ... */
9966 if (CONSTANT_P (op1)
9967 || (GET_CODE (op1) == SUBREG
9968 && CONSTANT_P (SUBREG_REG (op1))))
9969 op1 = validize_mem (force_const_mem (mode, op1));
9970
9971 /* ... nor both arguments in memory. */
9972 if (!register_operand (op0, mode)
9973 && !register_operand (op1, mode))
9974 op1 = force_reg (mode, op1);
9975
9976 tmp[0] = op0; tmp[1] = op1;
9977 ix86_expand_vector_move_misalign (mode, tmp);
9978 return;
9979 }
9980
9981 /* Make operand1 a register if it isn't already. */
9982 if (can_create_pseudo_p ()
9983 && !register_operand (op0, mode)
9984 && !register_operand (op1, mode))
9985 {
9986 emit_move_insn (op0, force_reg (GET_MODE (op0), op1));
9987 return;
9988 }
9989
9990 emit_insn (gen_rtx_SET (VOIDmode, op0, op1));
9991 }
9992
9993 /* Implement the movmisalign patterns for SSE. Non-SSE modes go
9994 straight to ix86_expand_vector_move. */
9995 /* Code generation for scalar reg-reg moves of single and double precision data:
9996 if (x86_sse_partial_reg_dependency == true | x86_sse_split_regs == true)
9997 movaps reg, reg
9998 else
9999 movss reg, reg
10000 if (x86_sse_partial_reg_dependency == true)
10001 movapd reg, reg
10002 else
10003 movsd reg, reg
10004
10005 Code generation for scalar loads of double precision data:
10006 if (x86_sse_split_regs == true)
10007 movlpd mem, reg (gas syntax)
10008 else
10009 movsd mem, reg
10010
10011 Code generation for unaligned packed loads of single precision data
10012 (x86_sse_unaligned_move_optimal overrides x86_sse_partial_reg_dependency):
10013 if (x86_sse_unaligned_move_optimal)
10014 movups mem, reg
10015
10016 if (x86_sse_partial_reg_dependency == true)
10017 {
10018 xorps reg, reg
10019 movlps mem, reg
10020 movhps mem+8, reg
10021 }
10022 else
10023 {
10024 movlps mem, reg
10025 movhps mem+8, reg
10026 }
10027
10028 Code generation for unaligned packed loads of double precision data
10029 (x86_sse_unaligned_move_optimal overrides x86_sse_split_regs):
10030 if (x86_sse_unaligned_move_optimal)
10031 movupd mem, reg
10032
10033 if (x86_sse_split_regs == true)
10034 {
10035 movlpd mem, reg
10036 movhpd mem+8, reg
10037 }
10038 else
10039 {
10040 movsd mem, reg
10041 movhpd mem+8, reg
10042 }
10043 */
10044
10045 void
10046 ix86_expand_vector_move_misalign (enum machine_mode mode, rtx operands[])
10047 {
10048 rtx op0, op1, m;
10049
10050 op0 = operands[0];
10051 op1 = operands[1];
10052
10053 if (MEM_P (op1))
10054 {
10055 /* If we're optimizing for size, movups is the smallest. */
10056 if (optimize_size)
10057 {
10058 op0 = gen_lowpart (V4SFmode, op0);
10059 op1 = gen_lowpart (V4SFmode, op1);
10060 emit_insn (gen_sse_movups (op0, op1));
10061 return;
10062 }
10063
10064 /* ??? If we have typed data, then it would appear that using
10065 movdqu is the only way to get unaligned data loaded with
10066 integer type. */
10067 if (TARGET_SSE2 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10068 {
10069 op0 = gen_lowpart (V16QImode, op0);
10070 op1 = gen_lowpart (V16QImode, op1);
10071 emit_insn (gen_sse2_movdqu (op0, op1));
10072 return;
10073 }
10074
10075 if (TARGET_SSE2 && mode == V2DFmode)
10076 {
10077 rtx zero;
10078
10079 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10080 {
10081 op0 = gen_lowpart (V2DFmode, op0);
10082 op1 = gen_lowpart (V2DFmode, op1);
10083 emit_insn (gen_sse2_movupd (op0, op1));
10084 return;
10085 }
10086
10087 /* When SSE registers are split into halves, we can avoid
10088 writing to the top half twice. */
10089 if (TARGET_SSE_SPLIT_REGS)
10090 {
10091 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10092 zero = op0;
10093 }
10094 else
10095 {
10096 /* ??? Not sure about the best option for the Intel chips.
10097 The following would seem to satisfy; the register is
10098 entirely cleared, breaking the dependency chain. We
10099 then store to the upper half, with a dependency depth
10100 of one. A rumor has it that Intel recommends two movsd
10101 followed by an unpacklpd, but this is unconfirmed. And
10102 given that the dependency depth of the unpacklpd would
10103 still be one, I'm not sure why this would be better. */
10104 zero = CONST0_RTX (V2DFmode);
10105 }
10106
10107 m = adjust_address (op1, DFmode, 0);
10108 emit_insn (gen_sse2_loadlpd (op0, zero, m));
10109 m = adjust_address (op1, DFmode, 8);
10110 emit_insn (gen_sse2_loadhpd (op0, op0, m));
10111 }
10112 else
10113 {
10114 if (TARGET_SSE_UNALIGNED_MOVE_OPTIMAL)
10115 {
10116 op0 = gen_lowpart (V4SFmode, op0);
10117 op1 = gen_lowpart (V4SFmode, op1);
10118 emit_insn (gen_sse_movups (op0, op1));
10119 return;
10120 }
10121
10122 if (TARGET_SSE_PARTIAL_REG_DEPENDENCY)
10123 emit_move_insn (op0, CONST0_RTX (mode));
10124 else
10125 emit_insn (gen_rtx_CLOBBER (VOIDmode, op0));
10126
10127 if (mode != V4SFmode)
10128 op0 = gen_lowpart (V4SFmode, op0);
10129 m = adjust_address (op1, V2SFmode, 0);
10130 emit_insn (gen_sse_loadlps (op0, op0, m));
10131 m = adjust_address (op1, V2SFmode, 8);
10132 emit_insn (gen_sse_loadhps (op0, op0, m));
10133 }
10134 }
10135 else if (MEM_P (op0))
10136 {
10137 /* If we're optimizing for size, movups is the smallest. */
10138 if (optimize_size)
10139 {
10140 op0 = gen_lowpart (V4SFmode, op0);
10141 op1 = gen_lowpart (V4SFmode, op1);
10142 emit_insn (gen_sse_movups (op0, op1));
10143 return;
10144 }
10145
10146 /* ??? Similar to above, only less clear because of quote
10147 typeless stores unquote. */
10148 if (TARGET_SSE2 && !TARGET_SSE_TYPELESS_STORES
10149 && GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
10150 {
10151 op0 = gen_lowpart (V16QImode, op0);
10152 op1 = gen_lowpart (V16QImode, op1);
10153 emit_insn (gen_sse2_movdqu (op0, op1));
10154 return;
10155 }
10156
10157 if (TARGET_SSE2 && mode == V2DFmode)
10158 {
10159 m = adjust_address (op0, DFmode, 0);
10160 emit_insn (gen_sse2_storelpd (m, op1));
10161 m = adjust_address (op0, DFmode, 8);
10162 emit_insn (gen_sse2_storehpd (m, op1));
10163 }
10164 else
10165 {
10166 if (mode != V4SFmode)
10167 op1 = gen_lowpart (V4SFmode, op1);
10168 m = adjust_address (op0, V2SFmode, 0);
10169 emit_insn (gen_sse_storelps (m, op1));
10170 m = adjust_address (op0, V2SFmode, 8);
10171 emit_insn (gen_sse_storehps (m, op1));
10172 }
10173 }
10174 else
10175 gcc_unreachable ();
10176 }
10177
10178 /* Expand a push in MODE. This is some mode for which we do not support
10179 proper push instructions, at least from the registers that we expect
10180 the value to live in. */
10181
10182 void
10183 ix86_expand_push (enum machine_mode mode, rtx x)
10184 {
10185 rtx tmp;
10186
10187 tmp = expand_simple_binop (Pmode, PLUS, stack_pointer_rtx,
10188 GEN_INT (-GET_MODE_SIZE (mode)),
10189 stack_pointer_rtx, 1, OPTAB_DIRECT);
10190 if (tmp != stack_pointer_rtx)
10191 emit_move_insn (stack_pointer_rtx, tmp);
10192
10193 tmp = gen_rtx_MEM (mode, stack_pointer_rtx);
10194 emit_move_insn (tmp, x);
10195 }
10196
10197 /* Helper function of ix86_fixup_binary_operands to canonicalize
10198 operand order. Returns true if the operands should be swapped. */
10199
10200 static bool
10201 ix86_swap_binary_operands_p (enum rtx_code code, enum machine_mode mode,
10202 rtx operands[])
10203 {
10204 rtx dst = operands[0];
10205 rtx src1 = operands[1];
10206 rtx src2 = operands[2];
10207
10208 /* If the operation is not commutative, we can't do anything. */
10209 if (GET_RTX_CLASS (code) != RTX_COMM_ARITH)
10210 return false;
10211
10212 /* Highest priority is that src1 should match dst. */
10213 if (rtx_equal_p (dst, src1))
10214 return false;
10215 if (rtx_equal_p (dst, src2))
10216 return true;
10217
10218 /* Next highest priority is that immediate constants come second. */
10219 if (immediate_operand (src2, mode))
10220 return false;
10221 if (immediate_operand (src1, mode))
10222 return true;
10223
10224 /* Lowest priority is that memory references should come second. */
10225 if (MEM_P (src2))
10226 return false;
10227 if (MEM_P (src1))
10228 return true;
10229
10230 return false;
10231 }
10232
10233
10234 /* Fix up OPERANDS to satisfy ix86_binary_operator_ok. Return the
10235 destination to use for the operation. If different from the true
10236 destination in operands[0], a copy operation will be required. */
10237
10238 rtx
10239 ix86_fixup_binary_operands (enum rtx_code code, enum machine_mode mode,
10240 rtx operands[])
10241 {
10242 rtx dst = operands[0];
10243 rtx src1 = operands[1];
10244 rtx src2 = operands[2];
10245
10246 /* Canonicalize operand order. */
10247 if (ix86_swap_binary_operands_p (code, mode, operands))
10248 {
10249 rtx temp = src1;
10250 src1 = src2;
10251 src2 = temp;
10252 }
10253
10254 /* Both source operands cannot be in memory. */
10255 if (MEM_P (src1) && MEM_P (src2))
10256 {
10257 /* Optimization: Only read from memory once. */
10258 if (rtx_equal_p (src1, src2))
10259 {
10260 src2 = force_reg (mode, src2);
10261 src1 = src2;
10262 }
10263 else
10264 src2 = force_reg (mode, src2);
10265 }
10266
10267 /* If the destination is memory, and we do not have matching source
10268 operands, do things in registers. */
10269 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10270 dst = gen_reg_rtx (mode);
10271
10272 /* Source 1 cannot be a constant. */
10273 if (CONSTANT_P (src1))
10274 src1 = force_reg (mode, src1);
10275
10276 /* Source 1 cannot be a non-matching memory. */
10277 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10278 src1 = force_reg (mode, src1);
10279
10280 operands[1] = src1;
10281 operands[2] = src2;
10282 return dst;
10283 }
10284
10285 /* Similarly, but assume that the destination has already been
10286 set up properly. */
10287
10288 void
10289 ix86_fixup_binary_operands_no_copy (enum rtx_code code,
10290 enum machine_mode mode, rtx operands[])
10291 {
10292 rtx dst = ix86_fixup_binary_operands (code, mode, operands);
10293 gcc_assert (dst == operands[0]);
10294 }
10295
10296 /* Attempt to expand a binary operator. Make the expansion closer to the
10297 actual machine, then just general_operand, which will allow 3 separate
10298 memory references (one output, two input) in a single insn. */
10299
10300 void
10301 ix86_expand_binary_operator (enum rtx_code code, enum machine_mode mode,
10302 rtx operands[])
10303 {
10304 rtx src1, src2, dst, op, clob;
10305
10306 dst = ix86_fixup_binary_operands (code, mode, operands);
10307 src1 = operands[1];
10308 src2 = operands[2];
10309
10310 /* Emit the instruction. */
10311
10312 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_ee (code, mode, src1, src2));
10313 if (reload_in_progress)
10314 {
10315 /* Reload doesn't know about the flags register, and doesn't know that
10316 it doesn't want to clobber it. We can only do this with PLUS. */
10317 gcc_assert (code == PLUS);
10318 emit_insn (op);
10319 }
10320 else
10321 {
10322 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10323 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10324 }
10325
10326 /* Fix up the destination if needed. */
10327 if (dst != operands[0])
10328 emit_move_insn (operands[0], dst);
10329 }
10330
10331 /* Return TRUE or FALSE depending on whether the binary operator meets the
10332 appropriate constraints. */
10333
10334 int
10335 ix86_binary_operator_ok (enum rtx_code code, enum machine_mode mode,
10336 rtx operands[3])
10337 {
10338 rtx dst = operands[0];
10339 rtx src1 = operands[1];
10340 rtx src2 = operands[2];
10341
10342 /* Both source operands cannot be in memory. */
10343 if (MEM_P (src1) && MEM_P (src2))
10344 return 0;
10345
10346 /* Canonicalize operand order for commutative operators. */
10347 if (ix86_swap_binary_operands_p (code, mode, operands))
10348 {
10349 rtx temp = src1;
10350 src1 = src2;
10351 src2 = temp;
10352 }
10353
10354 /* If the destination is memory, we must have a matching source operand. */
10355 if (MEM_P (dst) && !rtx_equal_p (dst, src1))
10356 return 0;
10357
10358 /* Source 1 cannot be a constant. */
10359 if (CONSTANT_P (src1))
10360 return 0;
10361
10362 /* Source 1 cannot be a non-matching memory. */
10363 if (MEM_P (src1) && !rtx_equal_p (dst, src1))
10364 return 0;
10365
10366 return 1;
10367 }
10368
10369 /* Attempt to expand a unary operator. Make the expansion closer to the
10370 actual machine, then just general_operand, which will allow 2 separate
10371 memory references (one output, one input) in a single insn. */
10372
10373 void
10374 ix86_expand_unary_operator (enum rtx_code code, enum machine_mode mode,
10375 rtx operands[])
10376 {
10377 int matching_memory;
10378 rtx src, dst, op, clob;
10379
10380 dst = operands[0];
10381 src = operands[1];
10382
10383 /* If the destination is memory, and we do not have matching source
10384 operands, do things in registers. */
10385 matching_memory = 0;
10386 if (MEM_P (dst))
10387 {
10388 if (rtx_equal_p (dst, src))
10389 matching_memory = 1;
10390 else
10391 dst = gen_reg_rtx (mode);
10392 }
10393
10394 /* When source operand is memory, destination must match. */
10395 if (MEM_P (src) && !matching_memory)
10396 src = force_reg (mode, src);
10397
10398 /* Emit the instruction. */
10399
10400 op = gen_rtx_SET (VOIDmode, dst, gen_rtx_fmt_e (code, mode, src));
10401 if (reload_in_progress || code == NOT)
10402 {
10403 /* Reload doesn't know about the flags register, and doesn't know that
10404 it doesn't want to clobber it. */
10405 gcc_assert (code == NOT);
10406 emit_insn (op);
10407 }
10408 else
10409 {
10410 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10411 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, op, clob)));
10412 }
10413
10414 /* Fix up the destination if needed. */
10415 if (dst != operands[0])
10416 emit_move_insn (operands[0], dst);
10417 }
10418
10419 /* Return TRUE or FALSE depending on whether the unary operator meets the
10420 appropriate constraints. */
10421
10422 int
10423 ix86_unary_operator_ok (enum rtx_code code ATTRIBUTE_UNUSED,
10424 enum machine_mode mode ATTRIBUTE_UNUSED,
10425 rtx operands[2] ATTRIBUTE_UNUSED)
10426 {
10427 /* If one of operands is memory, source and destination must match. */
10428 if ((MEM_P (operands[0])
10429 || MEM_P (operands[1]))
10430 && ! rtx_equal_p (operands[0], operands[1]))
10431 return FALSE;
10432 return TRUE;
10433 }
10434
10435 /* Post-reload splitter for converting an SF or DFmode value in an
10436 SSE register into an unsigned SImode. */
10437
10438 void
10439 ix86_split_convert_uns_si_sse (rtx operands[])
10440 {
10441 enum machine_mode vecmode;
10442 rtx value, large, zero_or_two31, input, two31, x;
10443
10444 large = operands[1];
10445 zero_or_two31 = operands[2];
10446 input = operands[3];
10447 two31 = operands[4];
10448 vecmode = GET_MODE (large);
10449 value = gen_rtx_REG (vecmode, REGNO (operands[0]));
10450
10451 /* Load up the value into the low element. We must ensure that the other
10452 elements are valid floats -- zero is the easiest such value. */
10453 if (MEM_P (input))
10454 {
10455 if (vecmode == V4SFmode)
10456 emit_insn (gen_vec_setv4sf_0 (value, CONST0_RTX (V4SFmode), input));
10457 else
10458 emit_insn (gen_sse2_loadlpd (value, CONST0_RTX (V2DFmode), input));
10459 }
10460 else
10461 {
10462 input = gen_rtx_REG (vecmode, REGNO (input));
10463 emit_move_insn (value, CONST0_RTX (vecmode));
10464 if (vecmode == V4SFmode)
10465 emit_insn (gen_sse_movss (value, value, input));
10466 else
10467 emit_insn (gen_sse2_movsd (value, value, input));
10468 }
10469
10470 emit_move_insn (large, two31);
10471 emit_move_insn (zero_or_two31, MEM_P (two31) ? large : two31);
10472
10473 x = gen_rtx_fmt_ee (LE, vecmode, large, value);
10474 emit_insn (gen_rtx_SET (VOIDmode, large, x));
10475
10476 x = gen_rtx_AND (vecmode, zero_or_two31, large);
10477 emit_insn (gen_rtx_SET (VOIDmode, zero_or_two31, x));
10478
10479 x = gen_rtx_MINUS (vecmode, value, zero_or_two31);
10480 emit_insn (gen_rtx_SET (VOIDmode, value, x));
10481
10482 large = gen_rtx_REG (V4SImode, REGNO (large));
10483 emit_insn (gen_ashlv4si3 (large, large, GEN_INT (31)));
10484
10485 x = gen_rtx_REG (V4SImode, REGNO (value));
10486 if (vecmode == V4SFmode)
10487 emit_insn (gen_sse2_cvttps2dq (x, value));
10488 else
10489 emit_insn (gen_sse2_cvttpd2dq (x, value));
10490 value = x;
10491
10492 emit_insn (gen_xorv4si3 (value, value, large));
10493 }
10494
10495 /* Convert an unsigned DImode value into a DFmode, using only SSE.
10496 Expects the 64-bit DImode to be supplied in a pair of integral
10497 registers. Requires SSE2; will use SSE3 if available. For x86_32,
10498 -mfpmath=sse, !optimize_size only. */
10499
10500 void
10501 ix86_expand_convert_uns_didf_sse (rtx target, rtx input)
10502 {
10503 REAL_VALUE_TYPE bias_lo_rvt, bias_hi_rvt;
10504 rtx int_xmm, fp_xmm;
10505 rtx biases, exponents;
10506 rtx x;
10507
10508 int_xmm = gen_reg_rtx (V4SImode);
10509 if (TARGET_INTER_UNIT_MOVES)
10510 emit_insn (gen_movdi_to_sse (int_xmm, input));
10511 else if (TARGET_SSE_SPLIT_REGS)
10512 {
10513 emit_insn (gen_rtx_CLOBBER (VOIDmode, int_xmm));
10514 emit_move_insn (gen_lowpart (DImode, int_xmm), input);
10515 }
10516 else
10517 {
10518 x = gen_reg_rtx (V2DImode);
10519 ix86_expand_vector_init_one_nonzero (false, V2DImode, x, input, 0);
10520 emit_move_insn (int_xmm, gen_lowpart (V4SImode, x));
10521 }
10522
10523 x = gen_rtx_CONST_VECTOR (V4SImode,
10524 gen_rtvec (4, GEN_INT (0x43300000UL),
10525 GEN_INT (0x45300000UL),
10526 const0_rtx, const0_rtx));
10527 exponents = validize_mem (force_const_mem (V4SImode, x));
10528
10529 /* int_xmm = {0x45300000UL, fp_xmm/hi, 0x43300000, fp_xmm/lo } */
10530 emit_insn (gen_sse2_punpckldq (int_xmm, int_xmm, exponents));
10531
10532 /* Concatenating (juxtaposing) (0x43300000UL ## fp_value_low_xmm)
10533 yields a valid DF value equal to (0x1.0p52 + double(fp_value_lo_xmm)).
10534 Similarly (0x45300000UL ## fp_value_hi_xmm) yields
10535 (0x1.0p84 + double(fp_value_hi_xmm)).
10536 Note these exponents differ by 32. */
10537
10538 fp_xmm = copy_to_mode_reg (V2DFmode, gen_lowpart (V2DFmode, int_xmm));
10539
10540 /* Subtract off those 0x1.0p52 and 0x1.0p84 biases, to produce values
10541 in [0,2**32-1] and [0]+[2**32,2**64-1] respectively. */
10542 real_ldexp (&bias_lo_rvt, &dconst1, 52);
10543 real_ldexp (&bias_hi_rvt, &dconst1, 84);
10544 biases = const_double_from_real_value (bias_lo_rvt, DFmode);
10545 x = const_double_from_real_value (bias_hi_rvt, DFmode);
10546 biases = gen_rtx_CONST_VECTOR (V2DFmode, gen_rtvec (2, biases, x));
10547 biases = validize_mem (force_const_mem (V2DFmode, biases));
10548 emit_insn (gen_subv2df3 (fp_xmm, fp_xmm, biases));
10549
10550 /* Add the upper and lower DFmode values together. */
10551 if (TARGET_SSE3)
10552 emit_insn (gen_sse3_haddv2df3 (fp_xmm, fp_xmm, fp_xmm));
10553 else
10554 {
10555 x = copy_to_mode_reg (V2DFmode, fp_xmm);
10556 emit_insn (gen_sse2_unpckhpd (fp_xmm, fp_xmm, fp_xmm));
10557 emit_insn (gen_addv2df3 (fp_xmm, fp_xmm, x));
10558 }
10559
10560 ix86_expand_vector_extract (false, target, fp_xmm, 0);
10561 }
10562
10563 /* Convert an unsigned SImode value into a DFmode. Only currently used
10564 for SSE, but applicable anywhere. */
10565
10566 void
10567 ix86_expand_convert_uns_sidf_sse (rtx target, rtx input)
10568 {
10569 REAL_VALUE_TYPE TWO31r;
10570 rtx x, fp;
10571
10572 x = expand_simple_binop (SImode, PLUS, input, GEN_INT (-2147483647 - 1),
10573 NULL, 1, OPTAB_DIRECT);
10574
10575 fp = gen_reg_rtx (DFmode);
10576 emit_insn (gen_floatsidf2 (fp, x));
10577
10578 real_ldexp (&TWO31r, &dconst1, 31);
10579 x = const_double_from_real_value (TWO31r, DFmode);
10580
10581 x = expand_simple_binop (DFmode, PLUS, fp, x, target, 0, OPTAB_DIRECT);
10582 if (x != target)
10583 emit_move_insn (target, x);
10584 }
10585
10586 /* Convert a signed DImode value into a DFmode. Only used for SSE in
10587 32-bit mode; otherwise we have a direct convert instruction. */
10588
10589 void
10590 ix86_expand_convert_sign_didf_sse (rtx target, rtx input)
10591 {
10592 REAL_VALUE_TYPE TWO32r;
10593 rtx fp_lo, fp_hi, x;
10594
10595 fp_lo = gen_reg_rtx (DFmode);
10596 fp_hi = gen_reg_rtx (DFmode);
10597
10598 emit_insn (gen_floatsidf2 (fp_hi, gen_highpart (SImode, input)));
10599
10600 real_ldexp (&TWO32r, &dconst1, 32);
10601 x = const_double_from_real_value (TWO32r, DFmode);
10602 fp_hi = expand_simple_binop (DFmode, MULT, fp_hi, x, fp_hi, 0, OPTAB_DIRECT);
10603
10604 ix86_expand_convert_uns_sidf_sse (fp_lo, gen_lowpart (SImode, input));
10605
10606 x = expand_simple_binop (DFmode, PLUS, fp_hi, fp_lo, target,
10607 0, OPTAB_DIRECT);
10608 if (x != target)
10609 emit_move_insn (target, x);
10610 }
10611
10612 /* Convert an unsigned SImode value into a SFmode, using only SSE.
10613 For x86_32, -mfpmath=sse, !optimize_size only. */
10614 void
10615 ix86_expand_convert_uns_sisf_sse (rtx target, rtx input)
10616 {
10617 REAL_VALUE_TYPE ONE16r;
10618 rtx fp_hi, fp_lo, int_hi, int_lo, x;
10619
10620 real_ldexp (&ONE16r, &dconst1, 16);
10621 x = const_double_from_real_value (ONE16r, SFmode);
10622 int_lo = expand_simple_binop (SImode, AND, input, GEN_INT(0xffff),
10623 NULL, 0, OPTAB_DIRECT);
10624 int_hi = expand_simple_binop (SImode, LSHIFTRT, input, GEN_INT(16),
10625 NULL, 0, OPTAB_DIRECT);
10626 fp_hi = gen_reg_rtx (SFmode);
10627 fp_lo = gen_reg_rtx (SFmode);
10628 emit_insn (gen_floatsisf2 (fp_hi, int_hi));
10629 emit_insn (gen_floatsisf2 (fp_lo, int_lo));
10630 fp_hi = expand_simple_binop (SFmode, MULT, fp_hi, x, fp_hi,
10631 0, OPTAB_DIRECT);
10632 fp_hi = expand_simple_binop (SFmode, PLUS, fp_hi, fp_lo, target,
10633 0, OPTAB_DIRECT);
10634 if (!rtx_equal_p (target, fp_hi))
10635 emit_move_insn (target, fp_hi);
10636 }
10637
10638 /* A subroutine of ix86_build_signbit_mask_vector. If VECT is true,
10639 then replicate the value for all elements of the vector
10640 register. */
10641
10642 rtx
10643 ix86_build_const_vector (enum machine_mode mode, bool vect, rtx value)
10644 {
10645 rtvec v;
10646 switch (mode)
10647 {
10648 case SImode:
10649 gcc_assert (vect);
10650 v = gen_rtvec (4, value, value, value, value);
10651 return gen_rtx_CONST_VECTOR (V4SImode, v);
10652
10653 case DImode:
10654 gcc_assert (vect);
10655 v = gen_rtvec (2, value, value);
10656 return gen_rtx_CONST_VECTOR (V2DImode, v);
10657
10658 case SFmode:
10659 if (vect)
10660 v = gen_rtvec (4, value, value, value, value);
10661 else
10662 v = gen_rtvec (4, value, CONST0_RTX (SFmode),
10663 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10664 return gen_rtx_CONST_VECTOR (V4SFmode, v);
10665
10666 case DFmode:
10667 if (vect)
10668 v = gen_rtvec (2, value, value);
10669 else
10670 v = gen_rtvec (2, value, CONST0_RTX (DFmode));
10671 return gen_rtx_CONST_VECTOR (V2DFmode, v);
10672
10673 default:
10674 gcc_unreachable ();
10675 }
10676 }
10677
10678 /* A subroutine of ix86_expand_fp_absneg_operator, copysign expanders
10679 and ix86_expand_int_vcond. Create a mask for the sign bit in MODE
10680 for an SSE register. If VECT is true, then replicate the mask for
10681 all elements of the vector register. If INVERT is true, then create
10682 a mask excluding the sign bit. */
10683
10684 rtx
10685 ix86_build_signbit_mask (enum machine_mode mode, bool vect, bool invert)
10686 {
10687 enum machine_mode vec_mode, imode;
10688 HOST_WIDE_INT hi, lo;
10689 int shift = 63;
10690 rtx v;
10691 rtx mask;
10692
10693 /* Find the sign bit, sign extended to 2*HWI. */
10694 switch (mode)
10695 {
10696 case SImode:
10697 case SFmode:
10698 imode = SImode;
10699 vec_mode = (mode == SImode) ? V4SImode : V4SFmode;
10700 lo = 0x80000000, hi = lo < 0;
10701 break;
10702
10703 case DImode:
10704 case DFmode:
10705 imode = DImode;
10706 vec_mode = (mode == DImode) ? V2DImode : V2DFmode;
10707 if (HOST_BITS_PER_WIDE_INT >= 64)
10708 lo = (HOST_WIDE_INT)1 << shift, hi = -1;
10709 else
10710 lo = 0, hi = (HOST_WIDE_INT)1 << (shift - HOST_BITS_PER_WIDE_INT);
10711 break;
10712
10713 case TImode:
10714 case TFmode:
10715 imode = TImode;
10716 vec_mode = VOIDmode;
10717 gcc_assert (HOST_BITS_PER_WIDE_INT >= 64);
10718 lo = 0, hi = (HOST_WIDE_INT)1 << shift;
10719 break;
10720
10721 default:
10722 gcc_unreachable ();
10723 }
10724
10725 if (invert)
10726 lo = ~lo, hi = ~hi;
10727
10728 /* Force this value into the low part of a fp vector constant. */
10729 mask = immed_double_const (lo, hi, imode);
10730 mask = gen_lowpart (mode, mask);
10731
10732 if (vec_mode == VOIDmode)
10733 return force_reg (mode, mask);
10734
10735 v = ix86_build_const_vector (mode, vect, mask);
10736 return force_reg (vec_mode, v);
10737 }
10738
10739 /* Generate code for floating point ABS or NEG. */
10740
10741 void
10742 ix86_expand_fp_absneg_operator (enum rtx_code code, enum machine_mode mode,
10743 rtx operands[])
10744 {
10745 rtx mask, set, use, clob, dst, src;
10746 bool matching_memory;
10747 bool use_sse = false;
10748 bool vector_mode = VECTOR_MODE_P (mode);
10749 enum machine_mode elt_mode = mode;
10750
10751 if (vector_mode)
10752 {
10753 elt_mode = GET_MODE_INNER (mode);
10754 use_sse = true;
10755 }
10756 else if (mode == TFmode)
10757 use_sse = true;
10758 else if (TARGET_SSE_MATH)
10759 use_sse = SSE_FLOAT_MODE_P (mode);
10760
10761 /* NEG and ABS performed with SSE use bitwise mask operations.
10762 Create the appropriate mask now. */
10763 if (use_sse)
10764 mask = ix86_build_signbit_mask (elt_mode, vector_mode, code == ABS);
10765 else
10766 mask = NULL_RTX;
10767
10768 dst = operands[0];
10769 src = operands[1];
10770
10771 /* If the destination is memory, and we don't have matching source
10772 operands or we're using the x87, do things in registers. */
10773 matching_memory = false;
10774 if (MEM_P (dst))
10775 {
10776 if (use_sse && rtx_equal_p (dst, src))
10777 matching_memory = true;
10778 else
10779 dst = gen_reg_rtx (mode);
10780 }
10781 if (MEM_P (src) && !matching_memory)
10782 src = force_reg (mode, src);
10783
10784 if (vector_mode)
10785 {
10786 set = gen_rtx_fmt_ee (code == NEG ? XOR : AND, mode, src, mask);
10787 set = gen_rtx_SET (VOIDmode, dst, set);
10788 emit_insn (set);
10789 }
10790 else
10791 {
10792 set = gen_rtx_fmt_e (code, mode, src);
10793 set = gen_rtx_SET (VOIDmode, dst, set);
10794 if (mask)
10795 {
10796 use = gen_rtx_USE (VOIDmode, mask);
10797 clob = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCmode, FLAGS_REG));
10798 emit_insn (gen_rtx_PARALLEL (VOIDmode,
10799 gen_rtvec (3, set, use, clob)));
10800 }
10801 else
10802 emit_insn (set);
10803 }
10804
10805 if (dst != operands[0])
10806 emit_move_insn (operands[0], dst);
10807 }
10808
10809 /* Expand a copysign operation. Special case operand 0 being a constant. */
10810
10811 void
10812 ix86_expand_copysign (rtx operands[])
10813 {
10814 enum machine_mode mode, vmode;
10815 rtx dest, op0, op1, mask, nmask;
10816
10817 dest = operands[0];
10818 op0 = operands[1];
10819 op1 = operands[2];
10820
10821 mode = GET_MODE (dest);
10822 vmode = mode == SFmode ? V4SFmode : V2DFmode;
10823
10824 if (GET_CODE (op0) == CONST_DOUBLE)
10825 {
10826 rtx (*copysign_insn)(rtx, rtx, rtx, rtx);
10827
10828 if (real_isneg (CONST_DOUBLE_REAL_VALUE (op0)))
10829 op0 = simplify_unary_operation (ABS, mode, op0, mode);
10830
10831 if (mode == SFmode || mode == DFmode)
10832 {
10833 if (op0 == CONST0_RTX (mode))
10834 op0 = CONST0_RTX (vmode);
10835 else
10836 {
10837 rtvec v;
10838
10839 if (mode == SFmode)
10840 v = gen_rtvec (4, op0, CONST0_RTX (SFmode),
10841 CONST0_RTX (SFmode), CONST0_RTX (SFmode));
10842 else
10843 v = gen_rtvec (2, op0, CONST0_RTX (DFmode));
10844 op0 = force_reg (vmode, gen_rtx_CONST_VECTOR (vmode, v));
10845 }
10846 }
10847
10848 mask = ix86_build_signbit_mask (mode, 0, 0);
10849
10850 if (mode == SFmode)
10851 copysign_insn = gen_copysignsf3_const;
10852 else if (mode == DFmode)
10853 copysign_insn = gen_copysigndf3_const;
10854 else
10855 copysign_insn = gen_copysigntf3_const;
10856
10857 emit_insn (copysign_insn (dest, op0, op1, mask));
10858 }
10859 else
10860 {
10861 rtx (*copysign_insn)(rtx, rtx, rtx, rtx, rtx, rtx);
10862
10863 nmask = ix86_build_signbit_mask (mode, 0, 1);
10864 mask = ix86_build_signbit_mask (mode, 0, 0);
10865
10866 if (mode == SFmode)
10867 copysign_insn = gen_copysignsf3_var;
10868 else if (mode == DFmode)
10869 copysign_insn = gen_copysigndf3_var;
10870 else
10871 copysign_insn = gen_copysigntf3_var;
10872
10873 emit_insn (copysign_insn (dest, NULL_RTX, op0, op1, nmask, mask));
10874 }
10875 }
10876
10877 /* Deconstruct a copysign operation into bit masks. Operand 0 is known to
10878 be a constant, and so has already been expanded into a vector constant. */
10879
10880 void
10881 ix86_split_copysign_const (rtx operands[])
10882 {
10883 enum machine_mode mode, vmode;
10884 rtx dest, op0, op1, mask, x;
10885
10886 dest = operands[0];
10887 op0 = operands[1];
10888 op1 = operands[2];
10889 mask = operands[3];
10890
10891 mode = GET_MODE (dest);
10892 vmode = GET_MODE (mask);
10893
10894 dest = simplify_gen_subreg (vmode, dest, mode, 0);
10895 x = gen_rtx_AND (vmode, dest, mask);
10896 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10897
10898 if (op0 != CONST0_RTX (vmode))
10899 {
10900 x = gen_rtx_IOR (vmode, dest, op0);
10901 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10902 }
10903 }
10904
10905 /* Deconstruct a copysign operation into bit masks. Operand 0 is variable,
10906 so we have to do two masks. */
10907
10908 void
10909 ix86_split_copysign_var (rtx operands[])
10910 {
10911 enum machine_mode mode, vmode;
10912 rtx dest, scratch, op0, op1, mask, nmask, x;
10913
10914 dest = operands[0];
10915 scratch = operands[1];
10916 op0 = operands[2];
10917 op1 = operands[3];
10918 nmask = operands[4];
10919 mask = operands[5];
10920
10921 mode = GET_MODE (dest);
10922 vmode = GET_MODE (mask);
10923
10924 if (rtx_equal_p (op0, op1))
10925 {
10926 /* Shouldn't happen often (it's useless, obviously), but when it does
10927 we'd generate incorrect code if we continue below. */
10928 emit_move_insn (dest, op0);
10929 return;
10930 }
10931
10932 if (REG_P (mask) && REGNO (dest) == REGNO (mask)) /* alternative 0 */
10933 {
10934 gcc_assert (REGNO (op1) == REGNO (scratch));
10935
10936 x = gen_rtx_AND (vmode, scratch, mask);
10937 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10938
10939 dest = mask;
10940 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10941 x = gen_rtx_NOT (vmode, dest);
10942 x = gen_rtx_AND (vmode, x, op0);
10943 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10944 }
10945 else
10946 {
10947 if (REGNO (op1) == REGNO (scratch)) /* alternative 1,3 */
10948 {
10949 x = gen_rtx_AND (vmode, scratch, mask);
10950 }
10951 else /* alternative 2,4 */
10952 {
10953 gcc_assert (REGNO (mask) == REGNO (scratch));
10954 op1 = simplify_gen_subreg (vmode, op1, mode, 0);
10955 x = gen_rtx_AND (vmode, scratch, op1);
10956 }
10957 emit_insn (gen_rtx_SET (VOIDmode, scratch, x));
10958
10959 if (REGNO (op0) == REGNO (dest)) /* alternative 1,2 */
10960 {
10961 dest = simplify_gen_subreg (vmode, op0, mode, 0);
10962 x = gen_rtx_AND (vmode, dest, nmask);
10963 }
10964 else /* alternative 3,4 */
10965 {
10966 gcc_assert (REGNO (nmask) == REGNO (dest));
10967 dest = nmask;
10968 op0 = simplify_gen_subreg (vmode, op0, mode, 0);
10969 x = gen_rtx_AND (vmode, dest, op0);
10970 }
10971 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10972 }
10973
10974 x = gen_rtx_IOR (vmode, dest, scratch);
10975 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
10976 }
10977
10978 /* Return TRUE or FALSE depending on whether the first SET in INSN
10979 has source and destination with matching CC modes, and that the
10980 CC mode is at least as constrained as REQ_MODE. */
10981
10982 int
10983 ix86_match_ccmode (rtx insn, enum machine_mode req_mode)
10984 {
10985 rtx set;
10986 enum machine_mode set_mode;
10987
10988 set = PATTERN (insn);
10989 if (GET_CODE (set) == PARALLEL)
10990 set = XVECEXP (set, 0, 0);
10991 gcc_assert (GET_CODE (set) == SET);
10992 gcc_assert (GET_CODE (SET_SRC (set)) == COMPARE);
10993
10994 set_mode = GET_MODE (SET_DEST (set));
10995 switch (set_mode)
10996 {
10997 case CCNOmode:
10998 if (req_mode != CCNOmode
10999 && (req_mode != CCmode
11000 || XEXP (SET_SRC (set), 1) != const0_rtx))
11001 return 0;
11002 break;
11003 case CCmode:
11004 if (req_mode == CCGCmode)
11005 return 0;
11006 /* FALLTHRU */
11007 case CCGCmode:
11008 if (req_mode == CCGOCmode || req_mode == CCNOmode)
11009 return 0;
11010 /* FALLTHRU */
11011 case CCGOCmode:
11012 if (req_mode == CCZmode)
11013 return 0;
11014 /* FALLTHRU */
11015 case CCZmode:
11016 break;
11017
11018 default:
11019 gcc_unreachable ();
11020 }
11021
11022 return (GET_MODE (SET_SRC (set)) == set_mode);
11023 }
11024
11025 /* Generate insn patterns to do an integer compare of OPERANDS. */
11026
11027 static rtx
11028 ix86_expand_int_compare (enum rtx_code code, rtx op0, rtx op1)
11029 {
11030 enum machine_mode cmpmode;
11031 rtx tmp, flags;
11032
11033 cmpmode = SELECT_CC_MODE (code, op0, op1);
11034 flags = gen_rtx_REG (cmpmode, FLAGS_REG);
11035
11036 /* This is very simple, but making the interface the same as in the
11037 FP case makes the rest of the code easier. */
11038 tmp = gen_rtx_COMPARE (cmpmode, op0, op1);
11039 emit_insn (gen_rtx_SET (VOIDmode, flags, tmp));
11040
11041 /* Return the test that should be put into the flags user, i.e.
11042 the bcc, scc, or cmov instruction. */
11043 return gen_rtx_fmt_ee (code, VOIDmode, flags, const0_rtx);
11044 }
11045
11046 /* Figure out whether to use ordered or unordered fp comparisons.
11047 Return the appropriate mode to use. */
11048
11049 enum machine_mode
11050 ix86_fp_compare_mode (enum rtx_code code ATTRIBUTE_UNUSED)
11051 {
11052 /* ??? In order to make all comparisons reversible, we do all comparisons
11053 non-trapping when compiling for IEEE. Once gcc is able to distinguish
11054 all forms trapping and nontrapping comparisons, we can make inequality
11055 comparisons trapping again, since it results in better code when using
11056 FCOM based compares. */
11057 return TARGET_IEEE_FP ? CCFPUmode : CCFPmode;
11058 }
11059
11060 enum machine_mode
11061 ix86_cc_mode (enum rtx_code code, rtx op0, rtx op1)
11062 {
11063 enum machine_mode mode = GET_MODE (op0);
11064
11065 if (SCALAR_FLOAT_MODE_P (mode))
11066 {
11067 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
11068 return ix86_fp_compare_mode (code);
11069 }
11070
11071 switch (code)
11072 {
11073 /* Only zero flag is needed. */
11074 case EQ: /* ZF=0 */
11075 case NE: /* ZF!=0 */
11076 return CCZmode;
11077 /* Codes needing carry flag. */
11078 case GEU: /* CF=0 */
11079 case LTU: /* CF=1 */
11080 /* Detect overflow checks. They need just the carry flag. */
11081 if (GET_CODE (op0) == PLUS
11082 && rtx_equal_p (op1, XEXP (op0, 0)))
11083 return CCCmode;
11084 else
11085 return CCmode;
11086 case GTU: /* CF=0 & ZF=0 */
11087 case LEU: /* CF=1 | ZF=1 */
11088 /* Detect overflow checks. They need just the carry flag. */
11089 if (GET_CODE (op0) == MINUS
11090 && rtx_equal_p (op1, XEXP (op0, 0)))
11091 return CCCmode;
11092 else
11093 return CCmode;
11094 /* Codes possibly doable only with sign flag when
11095 comparing against zero. */
11096 case GE: /* SF=OF or SF=0 */
11097 case LT: /* SF<>OF or SF=1 */
11098 if (op1 == const0_rtx)
11099 return CCGOCmode;
11100 else
11101 /* For other cases Carry flag is not required. */
11102 return CCGCmode;
11103 /* Codes doable only with sign flag when comparing
11104 against zero, but we miss jump instruction for it
11105 so we need to use relational tests against overflow
11106 that thus needs to be zero. */
11107 case GT: /* ZF=0 & SF=OF */
11108 case LE: /* ZF=1 | SF<>OF */
11109 if (op1 == const0_rtx)
11110 return CCNOmode;
11111 else
11112 return CCGCmode;
11113 /* strcmp pattern do (use flags) and combine may ask us for proper
11114 mode. */
11115 case USE:
11116 return CCmode;
11117 default:
11118 gcc_unreachable ();
11119 }
11120 }
11121
11122 /* Return the fixed registers used for condition codes. */
11123
11124 static bool
11125 ix86_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
11126 {
11127 *p1 = FLAGS_REG;
11128 *p2 = FPSR_REG;
11129 return true;
11130 }
11131
11132 /* If two condition code modes are compatible, return a condition code
11133 mode which is compatible with both. Otherwise, return
11134 VOIDmode. */
11135
11136 static enum machine_mode
11137 ix86_cc_modes_compatible (enum machine_mode m1, enum machine_mode m2)
11138 {
11139 if (m1 == m2)
11140 return m1;
11141
11142 if (GET_MODE_CLASS (m1) != MODE_CC || GET_MODE_CLASS (m2) != MODE_CC)
11143 return VOIDmode;
11144
11145 if ((m1 == CCGCmode && m2 == CCGOCmode)
11146 || (m1 == CCGOCmode && m2 == CCGCmode))
11147 return CCGCmode;
11148
11149 switch (m1)
11150 {
11151 default:
11152 gcc_unreachable ();
11153
11154 case CCmode:
11155 case CCGCmode:
11156 case CCGOCmode:
11157 case CCNOmode:
11158 case CCAmode:
11159 case CCCmode:
11160 case CCOmode:
11161 case CCSmode:
11162 case CCZmode:
11163 switch (m2)
11164 {
11165 default:
11166 return VOIDmode;
11167
11168 case CCmode:
11169 case CCGCmode:
11170 case CCGOCmode:
11171 case CCNOmode:
11172 case CCAmode:
11173 case CCCmode:
11174 case CCOmode:
11175 case CCSmode:
11176 case CCZmode:
11177 return CCmode;
11178 }
11179
11180 case CCFPmode:
11181 case CCFPUmode:
11182 /* These are only compatible with themselves, which we already
11183 checked above. */
11184 return VOIDmode;
11185 }
11186 }
11187
11188 /* Split comparison code CODE into comparisons we can do using branch
11189 instructions. BYPASS_CODE is comparison code for branch that will
11190 branch around FIRST_CODE and SECOND_CODE. If some of branches
11191 is not required, set value to UNKNOWN.
11192 We never require more than two branches. */
11193
11194 void
11195 ix86_fp_comparison_codes (enum rtx_code code, enum rtx_code *bypass_code,
11196 enum rtx_code *first_code,
11197 enum rtx_code *second_code)
11198 {
11199 *first_code = code;
11200 *bypass_code = UNKNOWN;
11201 *second_code = UNKNOWN;
11202
11203 /* The fcomi comparison sets flags as follows:
11204
11205 cmp ZF PF CF
11206 > 0 0 0
11207 < 0 0 1
11208 = 1 0 0
11209 un 1 1 1 */
11210
11211 switch (code)
11212 {
11213 case GT: /* GTU - CF=0 & ZF=0 */
11214 case GE: /* GEU - CF=0 */
11215 case ORDERED: /* PF=0 */
11216 case UNORDERED: /* PF=1 */
11217 case UNEQ: /* EQ - ZF=1 */
11218 case UNLT: /* LTU - CF=1 */
11219 case UNLE: /* LEU - CF=1 | ZF=1 */
11220 case LTGT: /* EQ - ZF=0 */
11221 break;
11222 case LT: /* LTU - CF=1 - fails on unordered */
11223 *first_code = UNLT;
11224 *bypass_code = UNORDERED;
11225 break;
11226 case LE: /* LEU - CF=1 | ZF=1 - fails on unordered */
11227 *first_code = UNLE;
11228 *bypass_code = UNORDERED;
11229 break;
11230 case EQ: /* EQ - ZF=1 - fails on unordered */
11231 *first_code = UNEQ;
11232 *bypass_code = UNORDERED;
11233 break;
11234 case NE: /* NE - ZF=0 - fails on unordered */
11235 *first_code = LTGT;
11236 *second_code = UNORDERED;
11237 break;
11238 case UNGE: /* GEU - CF=0 - fails on unordered */
11239 *first_code = GE;
11240 *second_code = UNORDERED;
11241 break;
11242 case UNGT: /* GTU - CF=0 & ZF=0 - fails on unordered */
11243 *first_code = GT;
11244 *second_code = UNORDERED;
11245 break;
11246 default:
11247 gcc_unreachable ();
11248 }
11249 if (!TARGET_IEEE_FP)
11250 {
11251 *second_code = UNKNOWN;
11252 *bypass_code = UNKNOWN;
11253 }
11254 }
11255
11256 /* Return cost of comparison done fcom + arithmetics operations on AX.
11257 All following functions do use number of instructions as a cost metrics.
11258 In future this should be tweaked to compute bytes for optimize_size and
11259 take into account performance of various instructions on various CPUs. */
11260 static int
11261 ix86_fp_comparison_arithmetics_cost (enum rtx_code code)
11262 {
11263 if (!TARGET_IEEE_FP)
11264 return 4;
11265 /* The cost of code output by ix86_expand_fp_compare. */
11266 switch (code)
11267 {
11268 case UNLE:
11269 case UNLT:
11270 case LTGT:
11271 case GT:
11272 case GE:
11273 case UNORDERED:
11274 case ORDERED:
11275 case UNEQ:
11276 return 4;
11277 break;
11278 case LT:
11279 case NE:
11280 case EQ:
11281 case UNGE:
11282 return 5;
11283 break;
11284 case LE:
11285 case UNGT:
11286 return 6;
11287 break;
11288 default:
11289 gcc_unreachable ();
11290 }
11291 }
11292
11293 /* Return cost of comparison done using fcomi operation.
11294 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11295 static int
11296 ix86_fp_comparison_fcomi_cost (enum rtx_code code)
11297 {
11298 enum rtx_code bypass_code, first_code, second_code;
11299 /* Return arbitrarily high cost when instruction is not supported - this
11300 prevents gcc from using it. */
11301 if (!TARGET_CMOVE)
11302 return 1024;
11303 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11304 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 2;
11305 }
11306
11307 /* Return cost of comparison done using sahf operation.
11308 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11309 static int
11310 ix86_fp_comparison_sahf_cost (enum rtx_code code)
11311 {
11312 enum rtx_code bypass_code, first_code, second_code;
11313 /* Return arbitrarily high cost when instruction is not preferred - this
11314 avoids gcc from using it. */
11315 if (!(TARGET_SAHF && (TARGET_USE_SAHF || optimize_size)))
11316 return 1024;
11317 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11318 return (bypass_code != UNKNOWN || second_code != UNKNOWN) + 3;
11319 }
11320
11321 /* Compute cost of the comparison done using any method.
11322 See ix86_fp_comparison_arithmetics_cost for the metrics. */
11323 static int
11324 ix86_fp_comparison_cost (enum rtx_code code)
11325 {
11326 int fcomi_cost, sahf_cost, arithmetics_cost = 1024;
11327 int min;
11328
11329 fcomi_cost = ix86_fp_comparison_fcomi_cost (code);
11330 sahf_cost = ix86_fp_comparison_sahf_cost (code);
11331
11332 min = arithmetics_cost = ix86_fp_comparison_arithmetics_cost (code);
11333 if (min > sahf_cost)
11334 min = sahf_cost;
11335 if (min > fcomi_cost)
11336 min = fcomi_cost;
11337 return min;
11338 }
11339
11340 /* Return true if we should use an FCOMI instruction for this
11341 fp comparison. */
11342
11343 int
11344 ix86_use_fcomi_compare (enum rtx_code code ATTRIBUTE_UNUSED)
11345 {
11346 enum rtx_code swapped_code = swap_condition (code);
11347
11348 return ((ix86_fp_comparison_cost (code)
11349 == ix86_fp_comparison_fcomi_cost (code))
11350 || (ix86_fp_comparison_cost (swapped_code)
11351 == ix86_fp_comparison_fcomi_cost (swapped_code)));
11352 }
11353
11354 /* Swap, force into registers, or otherwise massage the two operands
11355 to a fp comparison. The operands are updated in place; the new
11356 comparison code is returned. */
11357
11358 static enum rtx_code
11359 ix86_prepare_fp_compare_args (enum rtx_code code, rtx *pop0, rtx *pop1)
11360 {
11361 enum machine_mode fpcmp_mode = ix86_fp_compare_mode (code);
11362 rtx op0 = *pop0, op1 = *pop1;
11363 enum machine_mode op_mode = GET_MODE (op0);
11364 int is_sse = TARGET_SSE_MATH && SSE_FLOAT_MODE_P (op_mode);
11365
11366 /* All of the unordered compare instructions only work on registers.
11367 The same is true of the fcomi compare instructions. The XFmode
11368 compare instructions require registers except when comparing
11369 against zero or when converting operand 1 from fixed point to
11370 floating point. */
11371
11372 if (!is_sse
11373 && (fpcmp_mode == CCFPUmode
11374 || (op_mode == XFmode
11375 && ! (standard_80387_constant_p (op0) == 1
11376 || standard_80387_constant_p (op1) == 1)
11377 && GET_CODE (op1) != FLOAT)
11378 || ix86_use_fcomi_compare (code)))
11379 {
11380 op0 = force_reg (op_mode, op0);
11381 op1 = force_reg (op_mode, op1);
11382 }
11383 else
11384 {
11385 /* %%% We only allow op1 in memory; op0 must be st(0). So swap
11386 things around if they appear profitable, otherwise force op0
11387 into a register. */
11388
11389 if (standard_80387_constant_p (op0) == 0
11390 || (MEM_P (op0)
11391 && ! (standard_80387_constant_p (op1) == 0
11392 || MEM_P (op1))))
11393 {
11394 rtx tmp;
11395 tmp = op0, op0 = op1, op1 = tmp;
11396 code = swap_condition (code);
11397 }
11398
11399 if (!REG_P (op0))
11400 op0 = force_reg (op_mode, op0);
11401
11402 if (CONSTANT_P (op1))
11403 {
11404 int tmp = standard_80387_constant_p (op1);
11405 if (tmp == 0)
11406 op1 = validize_mem (force_const_mem (op_mode, op1));
11407 else if (tmp == 1)
11408 {
11409 if (TARGET_CMOVE)
11410 op1 = force_reg (op_mode, op1);
11411 }
11412 else
11413 op1 = force_reg (op_mode, op1);
11414 }
11415 }
11416
11417 /* Try to rearrange the comparison to make it cheaper. */
11418 if (ix86_fp_comparison_cost (code)
11419 > ix86_fp_comparison_cost (swap_condition (code))
11420 && (REG_P (op1) || can_create_pseudo_p ()))
11421 {
11422 rtx tmp;
11423 tmp = op0, op0 = op1, op1 = tmp;
11424 code = swap_condition (code);
11425 if (!REG_P (op0))
11426 op0 = force_reg (op_mode, op0);
11427 }
11428
11429 *pop0 = op0;
11430 *pop1 = op1;
11431 return code;
11432 }
11433
11434 /* Convert comparison codes we use to represent FP comparison to integer
11435 code that will result in proper branch. Return UNKNOWN if no such code
11436 is available. */
11437
11438 enum rtx_code
11439 ix86_fp_compare_code_to_integer (enum rtx_code code)
11440 {
11441 switch (code)
11442 {
11443 case GT:
11444 return GTU;
11445 case GE:
11446 return GEU;
11447 case ORDERED:
11448 case UNORDERED:
11449 return code;
11450 break;
11451 case UNEQ:
11452 return EQ;
11453 break;
11454 case UNLT:
11455 return LTU;
11456 break;
11457 case UNLE:
11458 return LEU;
11459 break;
11460 case LTGT:
11461 return NE;
11462 break;
11463 default:
11464 return UNKNOWN;
11465 }
11466 }
11467
11468 /* Generate insn patterns to do a floating point compare of OPERANDS. */
11469
11470 static rtx
11471 ix86_expand_fp_compare (enum rtx_code code, rtx op0, rtx op1, rtx scratch,
11472 rtx *second_test, rtx *bypass_test)
11473 {
11474 enum machine_mode fpcmp_mode, intcmp_mode;
11475 rtx tmp, tmp2;
11476 int cost = ix86_fp_comparison_cost (code);
11477 enum rtx_code bypass_code, first_code, second_code;
11478
11479 fpcmp_mode = ix86_fp_compare_mode (code);
11480 code = ix86_prepare_fp_compare_args (code, &op0, &op1);
11481
11482 if (second_test)
11483 *second_test = NULL_RTX;
11484 if (bypass_test)
11485 *bypass_test = NULL_RTX;
11486
11487 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11488
11489 /* Do fcomi/sahf based test when profitable. */
11490 if (ix86_fp_comparison_arithmetics_cost (code) > cost
11491 && (bypass_code == UNKNOWN || bypass_test)
11492 && (second_code == UNKNOWN || second_test))
11493 {
11494 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11495 tmp = gen_rtx_SET (VOIDmode, gen_rtx_REG (fpcmp_mode, FLAGS_REG),
11496 tmp);
11497 if (TARGET_CMOVE)
11498 emit_insn (tmp);
11499 else
11500 {
11501 gcc_assert (TARGET_SAHF);
11502
11503 if (!scratch)
11504 scratch = gen_reg_rtx (HImode);
11505 tmp2 = gen_rtx_CLOBBER (VOIDmode, scratch);
11506
11507 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, tmp, tmp2)));
11508 }
11509
11510 /* The FP codes work out to act like unsigned. */
11511 intcmp_mode = fpcmp_mode;
11512 code = first_code;
11513 if (bypass_code != UNKNOWN)
11514 *bypass_test = gen_rtx_fmt_ee (bypass_code, VOIDmode,
11515 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11516 const0_rtx);
11517 if (second_code != UNKNOWN)
11518 *second_test = gen_rtx_fmt_ee (second_code, VOIDmode,
11519 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11520 const0_rtx);
11521 }
11522 else
11523 {
11524 /* Sadness wrt reg-stack pops killing fpsr -- gotta get fnstsw first. */
11525 tmp = gen_rtx_COMPARE (fpcmp_mode, op0, op1);
11526 tmp2 = gen_rtx_UNSPEC (HImode, gen_rtvec (1, tmp), UNSPEC_FNSTSW);
11527 if (!scratch)
11528 scratch = gen_reg_rtx (HImode);
11529 emit_insn (gen_rtx_SET (VOIDmode, scratch, tmp2));
11530
11531 /* In the unordered case, we have to check C2 for NaN's, which
11532 doesn't happen to work out to anything nice combination-wise.
11533 So do some bit twiddling on the value we've got in AH to come
11534 up with an appropriate set of condition codes. */
11535
11536 intcmp_mode = CCNOmode;
11537 switch (code)
11538 {
11539 case GT:
11540 case UNGT:
11541 if (code == GT || !TARGET_IEEE_FP)
11542 {
11543 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11544 code = EQ;
11545 }
11546 else
11547 {
11548 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11549 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11550 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x44)));
11551 intcmp_mode = CCmode;
11552 code = GEU;
11553 }
11554 break;
11555 case LT:
11556 case UNLT:
11557 if (code == LT && TARGET_IEEE_FP)
11558 {
11559 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11560 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x01)));
11561 intcmp_mode = CCmode;
11562 code = EQ;
11563 }
11564 else
11565 {
11566 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x01)));
11567 code = NE;
11568 }
11569 break;
11570 case GE:
11571 case UNGE:
11572 if (code == GE || !TARGET_IEEE_FP)
11573 {
11574 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x05)));
11575 code = EQ;
11576 }
11577 else
11578 {
11579 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11580 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11581 GEN_INT (0x01)));
11582 code = NE;
11583 }
11584 break;
11585 case LE:
11586 case UNLE:
11587 if (code == LE && TARGET_IEEE_FP)
11588 {
11589 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11590 emit_insn (gen_addqi_ext_1 (scratch, scratch, constm1_rtx));
11591 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11592 intcmp_mode = CCmode;
11593 code = LTU;
11594 }
11595 else
11596 {
11597 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x45)));
11598 code = NE;
11599 }
11600 break;
11601 case EQ:
11602 case UNEQ:
11603 if (code == EQ && TARGET_IEEE_FP)
11604 {
11605 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11606 emit_insn (gen_cmpqi_ext_3 (scratch, GEN_INT (0x40)));
11607 intcmp_mode = CCmode;
11608 code = EQ;
11609 }
11610 else
11611 {
11612 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11613 code = NE;
11614 break;
11615 }
11616 break;
11617 case NE:
11618 case LTGT:
11619 if (code == NE && TARGET_IEEE_FP)
11620 {
11621 emit_insn (gen_andqi_ext_0 (scratch, scratch, GEN_INT (0x45)));
11622 emit_insn (gen_xorqi_cc_ext_1 (scratch, scratch,
11623 GEN_INT (0x40)));
11624 code = NE;
11625 }
11626 else
11627 {
11628 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x40)));
11629 code = EQ;
11630 }
11631 break;
11632
11633 case UNORDERED:
11634 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11635 code = NE;
11636 break;
11637 case ORDERED:
11638 emit_insn (gen_testqi_ext_ccno_0 (scratch, GEN_INT (0x04)));
11639 code = EQ;
11640 break;
11641
11642 default:
11643 gcc_unreachable ();
11644 }
11645 }
11646
11647 /* Return the test that should be put into the flags user, i.e.
11648 the bcc, scc, or cmov instruction. */
11649 return gen_rtx_fmt_ee (code, VOIDmode,
11650 gen_rtx_REG (intcmp_mode, FLAGS_REG),
11651 const0_rtx);
11652 }
11653
11654 rtx
11655 ix86_expand_compare (enum rtx_code code, rtx *second_test, rtx *bypass_test)
11656 {
11657 rtx op0, op1, ret;
11658 op0 = ix86_compare_op0;
11659 op1 = ix86_compare_op1;
11660
11661 if (second_test)
11662 *second_test = NULL_RTX;
11663 if (bypass_test)
11664 *bypass_test = NULL_RTX;
11665
11666 if (ix86_compare_emitted)
11667 {
11668 ret = gen_rtx_fmt_ee (code, VOIDmode, ix86_compare_emitted, const0_rtx);
11669 ix86_compare_emitted = NULL_RTX;
11670 }
11671 else if (SCALAR_FLOAT_MODE_P (GET_MODE (op0)))
11672 {
11673 gcc_assert (!DECIMAL_FLOAT_MODE_P (GET_MODE (op0)));
11674 ret = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
11675 second_test, bypass_test);
11676 }
11677 else
11678 ret = ix86_expand_int_compare (code, op0, op1);
11679
11680 return ret;
11681 }
11682
11683 /* Return true if the CODE will result in nontrivial jump sequence. */
11684 bool
11685 ix86_fp_jump_nontrivial_p (enum rtx_code code)
11686 {
11687 enum rtx_code bypass_code, first_code, second_code;
11688 if (!TARGET_CMOVE)
11689 return true;
11690 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11691 return bypass_code != UNKNOWN || second_code != UNKNOWN;
11692 }
11693
11694 void
11695 ix86_expand_branch (enum rtx_code code, rtx label)
11696 {
11697 rtx tmp;
11698
11699 /* If we have emitted a compare insn, go straight to simple.
11700 ix86_expand_compare won't emit anything if ix86_compare_emitted
11701 is non NULL. */
11702 if (ix86_compare_emitted)
11703 goto simple;
11704
11705 switch (GET_MODE (ix86_compare_op0))
11706 {
11707 case QImode:
11708 case HImode:
11709 case SImode:
11710 simple:
11711 tmp = ix86_expand_compare (code, NULL, NULL);
11712 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11713 gen_rtx_LABEL_REF (VOIDmode, label),
11714 pc_rtx);
11715 emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
11716 return;
11717
11718 case SFmode:
11719 case DFmode:
11720 case XFmode:
11721 {
11722 rtvec vec;
11723 int use_fcomi;
11724 enum rtx_code bypass_code, first_code, second_code;
11725
11726 code = ix86_prepare_fp_compare_args (code, &ix86_compare_op0,
11727 &ix86_compare_op1);
11728
11729 ix86_fp_comparison_codes (code, &bypass_code, &first_code, &second_code);
11730
11731 /* Check whether we will use the natural sequence with one jump. If
11732 so, we can expand jump early. Otherwise delay expansion by
11733 creating compound insn to not confuse optimizers. */
11734 if (bypass_code == UNKNOWN && second_code == UNKNOWN)
11735 {
11736 ix86_split_fp_branch (code, ix86_compare_op0, ix86_compare_op1,
11737 gen_rtx_LABEL_REF (VOIDmode, label),
11738 pc_rtx, NULL_RTX, NULL_RTX);
11739 }
11740 else
11741 {
11742 tmp = gen_rtx_fmt_ee (code, VOIDmode,
11743 ix86_compare_op0, ix86_compare_op1);
11744 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
11745 gen_rtx_LABEL_REF (VOIDmode, label),
11746 pc_rtx);
11747 tmp = gen_rtx_SET (VOIDmode, pc_rtx, tmp);
11748
11749 use_fcomi = ix86_use_fcomi_compare (code);
11750 vec = rtvec_alloc (3 + !use_fcomi);
11751 RTVEC_ELT (vec, 0) = tmp;
11752 RTVEC_ELT (vec, 1)
11753 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FPSR_REG));
11754 RTVEC_ELT (vec, 2)
11755 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (CCFPmode, FLAGS_REG));
11756 if (! use_fcomi)
11757 RTVEC_ELT (vec, 3)
11758 = gen_rtx_CLOBBER (VOIDmode, gen_rtx_SCRATCH (HImode));
11759
11760 emit_jump_insn (gen_rtx_PARALLEL (VOIDmode, vec));
11761 }
11762 return;
11763 }
11764
11765 case DImode:
11766 if (TARGET_64BIT)
11767 goto simple;
11768 case TImode:
11769 /* Expand DImode branch into multiple compare+branch. */
11770 {
11771 rtx lo[2], hi[2], label2;
11772 enum rtx_code code1, code2, code3;
11773 enum machine_mode submode;
11774
11775 if (CONSTANT_P (ix86_compare_op0) && ! CONSTANT_P (ix86_compare_op1))
11776 {
11777 tmp = ix86_compare_op0;
11778 ix86_compare_op0 = ix86_compare_op1;
11779 ix86_compare_op1 = tmp;
11780 code = swap_condition (code);
11781 }
11782 if (GET_MODE (ix86_compare_op0) == DImode)
11783 {
11784 split_di (&ix86_compare_op0, 1, lo+0, hi+0);
11785 split_di (&ix86_compare_op1, 1, lo+1, hi+1);
11786 submode = SImode;
11787 }
11788 else
11789 {
11790 split_ti (&ix86_compare_op0, 1, lo+0, hi+0);
11791 split_ti (&ix86_compare_op1, 1, lo+1, hi+1);
11792 submode = DImode;
11793 }
11794
11795 /* When comparing for equality, we can use (hi0^hi1)|(lo0^lo1) to
11796 avoid two branches. This costs one extra insn, so disable when
11797 optimizing for size. */
11798
11799 if ((code == EQ || code == NE)
11800 && (!optimize_size
11801 || hi[1] == const0_rtx || lo[1] == const0_rtx))
11802 {
11803 rtx xor0, xor1;
11804
11805 xor1 = hi[0];
11806 if (hi[1] != const0_rtx)
11807 xor1 = expand_binop (submode, xor_optab, xor1, hi[1],
11808 NULL_RTX, 0, OPTAB_WIDEN);
11809
11810 xor0 = lo[0];
11811 if (lo[1] != const0_rtx)
11812 xor0 = expand_binop (submode, xor_optab, xor0, lo[1],
11813 NULL_RTX, 0, OPTAB_WIDEN);
11814
11815 tmp = expand_binop (submode, ior_optab, xor1, xor0,
11816 NULL_RTX, 0, OPTAB_WIDEN);
11817
11818 ix86_compare_op0 = tmp;
11819 ix86_compare_op1 = const0_rtx;
11820 ix86_expand_branch (code, label);
11821 return;
11822 }
11823
11824 /* Otherwise, if we are doing less-than or greater-or-equal-than,
11825 op1 is a constant and the low word is zero, then we can just
11826 examine the high word. */
11827
11828 if (CONST_INT_P (hi[1]) && lo[1] == const0_rtx)
11829 switch (code)
11830 {
11831 case LT: case LTU: case GE: case GEU:
11832 ix86_compare_op0 = hi[0];
11833 ix86_compare_op1 = hi[1];
11834 ix86_expand_branch (code, label);
11835 return;
11836 default:
11837 break;
11838 }
11839
11840 /* Otherwise, we need two or three jumps. */
11841
11842 label2 = gen_label_rtx ();
11843
11844 code1 = code;
11845 code2 = swap_condition (code);
11846 code3 = unsigned_condition (code);
11847
11848 switch (code)
11849 {
11850 case LT: case GT: case LTU: case GTU:
11851 break;
11852
11853 case LE: code1 = LT; code2 = GT; break;
11854 case GE: code1 = GT; code2 = LT; break;
11855 case LEU: code1 = LTU; code2 = GTU; break;
11856 case GEU: code1 = GTU; code2 = LTU; break;
11857
11858 case EQ: code1 = UNKNOWN; code2 = NE; break;
11859 case NE: code2 = UNKNOWN; break;
11860
11861 default:
11862 gcc_unreachable ();
11863 }
11864
11865 /*
11866 * a < b =>
11867 * if (hi(a) < hi(b)) goto true;
11868 * if (hi(a) > hi(b)) goto false;
11869 * if (lo(a) < lo(b)) goto true;
11870 * false:
11871 */
11872
11873 ix86_compare_op0 = hi[0];
11874 ix86_compare_op1 = hi[1];
11875
11876 if (code1 != UNKNOWN)
11877 ix86_expand_branch (code1, label);
11878 if (code2 != UNKNOWN)
11879 ix86_expand_branch (code2, label2);
11880
11881 ix86_compare_op0 = lo[0];
11882 ix86_compare_op1 = lo[1];
11883 ix86_expand_branch (code3, label);
11884
11885 if (code2 != UNKNOWN)
11886 emit_label (label2);
11887 return;
11888 }
11889
11890 default:
11891 gcc_unreachable ();
11892 }
11893 }
11894
11895 /* Split branch based on floating point condition. */
11896 void
11897 ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
11898 rtx target1, rtx target2, rtx tmp, rtx pushed)
11899 {
11900 rtx second, bypass;
11901 rtx label = NULL_RTX;
11902 rtx condition;
11903 int bypass_probability = -1, second_probability = -1, probability = -1;
11904 rtx i;
11905
11906 if (target2 != pc_rtx)
11907 {
11908 rtx tmp = target2;
11909 code = reverse_condition_maybe_unordered (code);
11910 target2 = target1;
11911 target1 = tmp;
11912 }
11913
11914 condition = ix86_expand_fp_compare (code, op1, op2,
11915 tmp, &second, &bypass);
11916
11917 /* Remove pushed operand from stack. */
11918 if (pushed)
11919 ix86_free_from_memory (GET_MODE (pushed));
11920
11921 if (split_branch_probability >= 0)
11922 {
11923 /* Distribute the probabilities across the jumps.
11924 Assume the BYPASS and SECOND to be always test
11925 for UNORDERED. */
11926 probability = split_branch_probability;
11927
11928 /* Value of 1 is low enough to make no need for probability
11929 to be updated. Later we may run some experiments and see
11930 if unordered values are more frequent in practice. */
11931 if (bypass)
11932 bypass_probability = 1;
11933 if (second)
11934 second_probability = 1;
11935 }
11936 if (bypass != NULL_RTX)
11937 {
11938 label = gen_label_rtx ();
11939 i = emit_jump_insn (gen_rtx_SET
11940 (VOIDmode, pc_rtx,
11941 gen_rtx_IF_THEN_ELSE (VOIDmode,
11942 bypass,
11943 gen_rtx_LABEL_REF (VOIDmode,
11944 label),
11945 pc_rtx)));
11946 if (bypass_probability >= 0)
11947 REG_NOTES (i)
11948 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11949 GEN_INT (bypass_probability),
11950 REG_NOTES (i));
11951 }
11952 i = emit_jump_insn (gen_rtx_SET
11953 (VOIDmode, pc_rtx,
11954 gen_rtx_IF_THEN_ELSE (VOIDmode,
11955 condition, target1, target2)));
11956 if (probability >= 0)
11957 REG_NOTES (i)
11958 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11959 GEN_INT (probability),
11960 REG_NOTES (i));
11961 if (second != NULL_RTX)
11962 {
11963 i = emit_jump_insn (gen_rtx_SET
11964 (VOIDmode, pc_rtx,
11965 gen_rtx_IF_THEN_ELSE (VOIDmode, second, target1,
11966 target2)));
11967 if (second_probability >= 0)
11968 REG_NOTES (i)
11969 = gen_rtx_EXPR_LIST (REG_BR_PROB,
11970 GEN_INT (second_probability),
11971 REG_NOTES (i));
11972 }
11973 if (label != NULL_RTX)
11974 emit_label (label);
11975 }
11976
11977 int
11978 ix86_expand_setcc (enum rtx_code code, rtx dest)
11979 {
11980 rtx ret, tmp, tmpreg, equiv;
11981 rtx second_test, bypass_test;
11982
11983 if (GET_MODE (ix86_compare_op0) == (TARGET_64BIT ? TImode : DImode))
11984 return 0; /* FAIL */
11985
11986 gcc_assert (GET_MODE (dest) == QImode);
11987
11988 ret = ix86_expand_compare (code, &second_test, &bypass_test);
11989 PUT_MODE (ret, QImode);
11990
11991 tmp = dest;
11992 tmpreg = dest;
11993
11994 emit_insn (gen_rtx_SET (VOIDmode, tmp, ret));
11995 if (bypass_test || second_test)
11996 {
11997 rtx test = second_test;
11998 int bypass = 0;
11999 rtx tmp2 = gen_reg_rtx (QImode);
12000 if (bypass_test)
12001 {
12002 gcc_assert (!second_test);
12003 test = bypass_test;
12004 bypass = 1;
12005 PUT_CODE (test, reverse_condition_maybe_unordered (GET_CODE (test)));
12006 }
12007 PUT_MODE (test, QImode);
12008 emit_insn (gen_rtx_SET (VOIDmode, tmp2, test));
12009
12010 if (bypass)
12011 emit_insn (gen_andqi3 (tmp, tmpreg, tmp2));
12012 else
12013 emit_insn (gen_iorqi3 (tmp, tmpreg, tmp2));
12014 }
12015
12016 /* Attach a REG_EQUAL note describing the comparison result. */
12017 if (ix86_compare_op0 && ix86_compare_op1)
12018 {
12019 equiv = simplify_gen_relational (code, QImode,
12020 GET_MODE (ix86_compare_op0),
12021 ix86_compare_op0, ix86_compare_op1);
12022 set_unique_reg_note (get_last_insn (), REG_EQUAL, equiv);
12023 }
12024
12025 return 1; /* DONE */
12026 }
12027
12028 /* Expand comparison setting or clearing carry flag. Return true when
12029 successful and set pop for the operation. */
12030 static bool
12031 ix86_expand_carry_flag_compare (enum rtx_code code, rtx op0, rtx op1, rtx *pop)
12032 {
12033 enum machine_mode mode =
12034 GET_MODE (op0) != VOIDmode ? GET_MODE (op0) : GET_MODE (op1);
12035
12036 /* Do not handle DImode compares that go through special path. */
12037 if (mode == (TARGET_64BIT ? TImode : DImode))
12038 return false;
12039
12040 if (SCALAR_FLOAT_MODE_P (mode))
12041 {
12042 rtx second_test = NULL, bypass_test = NULL;
12043 rtx compare_op, compare_seq;
12044
12045 gcc_assert (!DECIMAL_FLOAT_MODE_P (mode));
12046
12047 /* Shortcut: following common codes never translate
12048 into carry flag compares. */
12049 if (code == EQ || code == NE || code == UNEQ || code == LTGT
12050 || code == ORDERED || code == UNORDERED)
12051 return false;
12052
12053 /* These comparisons require zero flag; swap operands so they won't. */
12054 if ((code == GT || code == UNLE || code == LE || code == UNGT)
12055 && !TARGET_IEEE_FP)
12056 {
12057 rtx tmp = op0;
12058 op0 = op1;
12059 op1 = tmp;
12060 code = swap_condition (code);
12061 }
12062
12063 /* Try to expand the comparison and verify that we end up with
12064 carry flag based comparison. This fails to be true only when
12065 we decide to expand comparison using arithmetic that is not
12066 too common scenario. */
12067 start_sequence ();
12068 compare_op = ix86_expand_fp_compare (code, op0, op1, NULL_RTX,
12069 &second_test, &bypass_test);
12070 compare_seq = get_insns ();
12071 end_sequence ();
12072
12073 if (second_test || bypass_test)
12074 return false;
12075
12076 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12077 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12078 code = ix86_fp_compare_code_to_integer (GET_CODE (compare_op));
12079 else
12080 code = GET_CODE (compare_op);
12081
12082 if (code != LTU && code != GEU)
12083 return false;
12084
12085 emit_insn (compare_seq);
12086 *pop = compare_op;
12087 return true;
12088 }
12089
12090 if (!INTEGRAL_MODE_P (mode))
12091 return false;
12092
12093 switch (code)
12094 {
12095 case LTU:
12096 case GEU:
12097 break;
12098
12099 /* Convert a==0 into (unsigned)a<1. */
12100 case EQ:
12101 case NE:
12102 if (op1 != const0_rtx)
12103 return false;
12104 op1 = const1_rtx;
12105 code = (code == EQ ? LTU : GEU);
12106 break;
12107
12108 /* Convert a>b into b<a or a>=b-1. */
12109 case GTU:
12110 case LEU:
12111 if (CONST_INT_P (op1))
12112 {
12113 op1 = gen_int_mode (INTVAL (op1) + 1, GET_MODE (op0));
12114 /* Bail out on overflow. We still can swap operands but that
12115 would force loading of the constant into register. */
12116 if (op1 == const0_rtx
12117 || !x86_64_immediate_operand (op1, GET_MODE (op1)))
12118 return false;
12119 code = (code == GTU ? GEU : LTU);
12120 }
12121 else
12122 {
12123 rtx tmp = op1;
12124 op1 = op0;
12125 op0 = tmp;
12126 code = (code == GTU ? LTU : GEU);
12127 }
12128 break;
12129
12130 /* Convert a>=0 into (unsigned)a<0x80000000. */
12131 case LT:
12132 case GE:
12133 if (mode == DImode || op1 != const0_rtx)
12134 return false;
12135 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12136 code = (code == LT ? GEU : LTU);
12137 break;
12138 case LE:
12139 case GT:
12140 if (mode == DImode || op1 != constm1_rtx)
12141 return false;
12142 op1 = gen_int_mode (1 << (GET_MODE_BITSIZE (mode) - 1), mode);
12143 code = (code == LE ? GEU : LTU);
12144 break;
12145
12146 default:
12147 return false;
12148 }
12149 /* Swapping operands may cause constant to appear as first operand. */
12150 if (!nonimmediate_operand (op0, VOIDmode))
12151 {
12152 if (!can_create_pseudo_p ())
12153 return false;
12154 op0 = force_reg (mode, op0);
12155 }
12156 ix86_compare_op0 = op0;
12157 ix86_compare_op1 = op1;
12158 *pop = ix86_expand_compare (code, NULL, NULL);
12159 gcc_assert (GET_CODE (*pop) == LTU || GET_CODE (*pop) == GEU);
12160 return true;
12161 }
12162
12163 int
12164 ix86_expand_int_movcc (rtx operands[])
12165 {
12166 enum rtx_code code = GET_CODE (operands[1]), compare_code;
12167 rtx compare_seq, compare_op;
12168 rtx second_test, bypass_test;
12169 enum machine_mode mode = GET_MODE (operands[0]);
12170 bool sign_bit_compare_p = false;;
12171
12172 start_sequence ();
12173 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12174 compare_seq = get_insns ();
12175 end_sequence ();
12176
12177 compare_code = GET_CODE (compare_op);
12178
12179 if ((ix86_compare_op1 == const0_rtx && (code == GE || code == LT))
12180 || (ix86_compare_op1 == constm1_rtx && (code == GT || code == LE)))
12181 sign_bit_compare_p = true;
12182
12183 /* Don't attempt mode expansion here -- if we had to expand 5 or 6
12184 HImode insns, we'd be swallowed in word prefix ops. */
12185
12186 if ((mode != HImode || TARGET_FAST_PREFIX)
12187 && (mode != (TARGET_64BIT ? TImode : DImode))
12188 && CONST_INT_P (operands[2])
12189 && CONST_INT_P (operands[3]))
12190 {
12191 rtx out = operands[0];
12192 HOST_WIDE_INT ct = INTVAL (operands[2]);
12193 HOST_WIDE_INT cf = INTVAL (operands[3]);
12194 HOST_WIDE_INT diff;
12195
12196 diff = ct - cf;
12197 /* Sign bit compares are better done using shifts than we do by using
12198 sbb. */
12199 if (sign_bit_compare_p
12200 || ix86_expand_carry_flag_compare (code, ix86_compare_op0,
12201 ix86_compare_op1, &compare_op))
12202 {
12203 /* Detect overlap between destination and compare sources. */
12204 rtx tmp = out;
12205
12206 if (!sign_bit_compare_p)
12207 {
12208 bool fpcmp = false;
12209
12210 compare_code = GET_CODE (compare_op);
12211
12212 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
12213 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
12214 {
12215 fpcmp = true;
12216 compare_code = ix86_fp_compare_code_to_integer (compare_code);
12217 }
12218
12219 /* To simplify rest of code, restrict to the GEU case. */
12220 if (compare_code == LTU)
12221 {
12222 HOST_WIDE_INT tmp = ct;
12223 ct = cf;
12224 cf = tmp;
12225 compare_code = reverse_condition (compare_code);
12226 code = reverse_condition (code);
12227 }
12228 else
12229 {
12230 if (fpcmp)
12231 PUT_CODE (compare_op,
12232 reverse_condition_maybe_unordered
12233 (GET_CODE (compare_op)));
12234 else
12235 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
12236 }
12237 diff = ct - cf;
12238
12239 if (reg_overlap_mentioned_p (out, ix86_compare_op0)
12240 || reg_overlap_mentioned_p (out, ix86_compare_op1))
12241 tmp = gen_reg_rtx (mode);
12242
12243 if (mode == DImode)
12244 emit_insn (gen_x86_movdicc_0_m1_rex64 (tmp, compare_op));
12245 else
12246 emit_insn (gen_x86_movsicc_0_m1 (gen_lowpart (SImode, tmp), compare_op));
12247 }
12248 else
12249 {
12250 if (code == GT || code == GE)
12251 code = reverse_condition (code);
12252 else
12253 {
12254 HOST_WIDE_INT tmp = ct;
12255 ct = cf;
12256 cf = tmp;
12257 diff = ct - cf;
12258 }
12259 tmp = emit_store_flag (tmp, code, ix86_compare_op0,
12260 ix86_compare_op1, VOIDmode, 0, -1);
12261 }
12262
12263 if (diff == 1)
12264 {
12265 /*
12266 * cmpl op0,op1
12267 * sbbl dest,dest
12268 * [addl dest, ct]
12269 *
12270 * Size 5 - 8.
12271 */
12272 if (ct)
12273 tmp = expand_simple_binop (mode, PLUS,
12274 tmp, GEN_INT (ct),
12275 copy_rtx (tmp), 1, OPTAB_DIRECT);
12276 }
12277 else if (cf == -1)
12278 {
12279 /*
12280 * cmpl op0,op1
12281 * sbbl dest,dest
12282 * orl $ct, dest
12283 *
12284 * Size 8.
12285 */
12286 tmp = expand_simple_binop (mode, IOR,
12287 tmp, GEN_INT (ct),
12288 copy_rtx (tmp), 1, OPTAB_DIRECT);
12289 }
12290 else if (diff == -1 && ct)
12291 {
12292 /*
12293 * cmpl op0,op1
12294 * sbbl dest,dest
12295 * notl dest
12296 * [addl dest, cf]
12297 *
12298 * Size 8 - 11.
12299 */
12300 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12301 if (cf)
12302 tmp = expand_simple_binop (mode, PLUS,
12303 copy_rtx (tmp), GEN_INT (cf),
12304 copy_rtx (tmp), 1, OPTAB_DIRECT);
12305 }
12306 else
12307 {
12308 /*
12309 * cmpl op0,op1
12310 * sbbl dest,dest
12311 * [notl dest]
12312 * andl cf - ct, dest
12313 * [addl dest, ct]
12314 *
12315 * Size 8 - 11.
12316 */
12317
12318 if (cf == 0)
12319 {
12320 cf = ct;
12321 ct = 0;
12322 tmp = expand_simple_unop (mode, NOT, tmp, copy_rtx (tmp), 1);
12323 }
12324
12325 tmp = expand_simple_binop (mode, AND,
12326 copy_rtx (tmp),
12327 gen_int_mode (cf - ct, mode),
12328 copy_rtx (tmp), 1, OPTAB_DIRECT);
12329 if (ct)
12330 tmp = expand_simple_binop (mode, PLUS,
12331 copy_rtx (tmp), GEN_INT (ct),
12332 copy_rtx (tmp), 1, OPTAB_DIRECT);
12333 }
12334
12335 if (!rtx_equal_p (tmp, out))
12336 emit_move_insn (copy_rtx (out), copy_rtx (tmp));
12337
12338 return 1; /* DONE */
12339 }
12340
12341 if (diff < 0)
12342 {
12343 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12344
12345 HOST_WIDE_INT tmp;
12346 tmp = ct, ct = cf, cf = tmp;
12347 diff = -diff;
12348
12349 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12350 {
12351 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12352
12353 /* We may be reversing unordered compare to normal compare, that
12354 is not valid in general (we may convert non-trapping condition
12355 to trapping one), however on i386 we currently emit all
12356 comparisons unordered. */
12357 compare_code = reverse_condition_maybe_unordered (compare_code);
12358 code = reverse_condition_maybe_unordered (code);
12359 }
12360 else
12361 {
12362 compare_code = reverse_condition (compare_code);
12363 code = reverse_condition (code);
12364 }
12365 }
12366
12367 compare_code = UNKNOWN;
12368 if (GET_MODE_CLASS (GET_MODE (ix86_compare_op0)) == MODE_INT
12369 && CONST_INT_P (ix86_compare_op1))
12370 {
12371 if (ix86_compare_op1 == const0_rtx
12372 && (code == LT || code == GE))
12373 compare_code = code;
12374 else if (ix86_compare_op1 == constm1_rtx)
12375 {
12376 if (code == LE)
12377 compare_code = LT;
12378 else if (code == GT)
12379 compare_code = GE;
12380 }
12381 }
12382
12383 /* Optimize dest = (op0 < 0) ? -1 : cf. */
12384 if (compare_code != UNKNOWN
12385 && GET_MODE (ix86_compare_op0) == GET_MODE (out)
12386 && (cf == -1 || ct == -1))
12387 {
12388 /* If lea code below could be used, only optimize
12389 if it results in a 2 insn sequence. */
12390
12391 if (! (diff == 1 || diff == 2 || diff == 4 || diff == 8
12392 || diff == 3 || diff == 5 || diff == 9)
12393 || (compare_code == LT && ct == -1)
12394 || (compare_code == GE && cf == -1))
12395 {
12396 /*
12397 * notl op1 (if necessary)
12398 * sarl $31, op1
12399 * orl cf, op1
12400 */
12401 if (ct != -1)
12402 {
12403 cf = ct;
12404 ct = -1;
12405 code = reverse_condition (code);
12406 }
12407
12408 out = emit_store_flag (out, code, ix86_compare_op0,
12409 ix86_compare_op1, VOIDmode, 0, -1);
12410
12411 out = expand_simple_binop (mode, IOR,
12412 out, GEN_INT (cf),
12413 out, 1, OPTAB_DIRECT);
12414 if (out != operands[0])
12415 emit_move_insn (operands[0], out);
12416
12417 return 1; /* DONE */
12418 }
12419 }
12420
12421
12422 if ((diff == 1 || diff == 2 || diff == 4 || diff == 8
12423 || diff == 3 || diff == 5 || diff == 9)
12424 && ((mode != QImode && mode != HImode) || !TARGET_PARTIAL_REG_STALL)
12425 && (mode != DImode
12426 || x86_64_immediate_operand (GEN_INT (cf), VOIDmode)))
12427 {
12428 /*
12429 * xorl dest,dest
12430 * cmpl op1,op2
12431 * setcc dest
12432 * lea cf(dest*(ct-cf)),dest
12433 *
12434 * Size 14.
12435 *
12436 * This also catches the degenerate setcc-only case.
12437 */
12438
12439 rtx tmp;
12440 int nops;
12441
12442 out = emit_store_flag (out, code, ix86_compare_op0,
12443 ix86_compare_op1, VOIDmode, 0, 1);
12444
12445 nops = 0;
12446 /* On x86_64 the lea instruction operates on Pmode, so we need
12447 to get arithmetics done in proper mode to match. */
12448 if (diff == 1)
12449 tmp = copy_rtx (out);
12450 else
12451 {
12452 rtx out1;
12453 out1 = copy_rtx (out);
12454 tmp = gen_rtx_MULT (mode, out1, GEN_INT (diff & ~1));
12455 nops++;
12456 if (diff & 1)
12457 {
12458 tmp = gen_rtx_PLUS (mode, tmp, out1);
12459 nops++;
12460 }
12461 }
12462 if (cf != 0)
12463 {
12464 tmp = gen_rtx_PLUS (mode, tmp, GEN_INT (cf));
12465 nops++;
12466 }
12467 if (!rtx_equal_p (tmp, out))
12468 {
12469 if (nops == 1)
12470 out = force_operand (tmp, copy_rtx (out));
12471 else
12472 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (out), copy_rtx (tmp)));
12473 }
12474 if (!rtx_equal_p (out, operands[0]))
12475 emit_move_insn (operands[0], copy_rtx (out));
12476
12477 return 1; /* DONE */
12478 }
12479
12480 /*
12481 * General case: Jumpful:
12482 * xorl dest,dest cmpl op1, op2
12483 * cmpl op1, op2 movl ct, dest
12484 * setcc dest jcc 1f
12485 * decl dest movl cf, dest
12486 * andl (cf-ct),dest 1:
12487 * addl ct,dest
12488 *
12489 * Size 20. Size 14.
12490 *
12491 * This is reasonably steep, but branch mispredict costs are
12492 * high on modern cpus, so consider failing only if optimizing
12493 * for space.
12494 */
12495
12496 if ((!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12497 && BRANCH_COST >= 2)
12498 {
12499 if (cf == 0)
12500 {
12501 enum machine_mode cmp_mode = GET_MODE (ix86_compare_op0);
12502
12503 cf = ct;
12504 ct = 0;
12505
12506 if (SCALAR_FLOAT_MODE_P (cmp_mode))
12507 {
12508 gcc_assert (!DECIMAL_FLOAT_MODE_P (cmp_mode));
12509
12510 /* We may be reversing unordered compare to normal compare,
12511 that is not valid in general (we may convert non-trapping
12512 condition to trapping one), however on i386 we currently
12513 emit all comparisons unordered. */
12514 code = reverse_condition_maybe_unordered (code);
12515 }
12516 else
12517 {
12518 code = reverse_condition (code);
12519 if (compare_code != UNKNOWN)
12520 compare_code = reverse_condition (compare_code);
12521 }
12522 }
12523
12524 if (compare_code != UNKNOWN)
12525 {
12526 /* notl op1 (if needed)
12527 sarl $31, op1
12528 andl (cf-ct), op1
12529 addl ct, op1
12530
12531 For x < 0 (resp. x <= -1) there will be no notl,
12532 so if possible swap the constants to get rid of the
12533 complement.
12534 True/false will be -1/0 while code below (store flag
12535 followed by decrement) is 0/-1, so the constants need
12536 to be exchanged once more. */
12537
12538 if (compare_code == GE || !cf)
12539 {
12540 code = reverse_condition (code);
12541 compare_code = LT;
12542 }
12543 else
12544 {
12545 HOST_WIDE_INT tmp = cf;
12546 cf = ct;
12547 ct = tmp;
12548 }
12549
12550 out = emit_store_flag (out, code, ix86_compare_op0,
12551 ix86_compare_op1, VOIDmode, 0, -1);
12552 }
12553 else
12554 {
12555 out = emit_store_flag (out, code, ix86_compare_op0,
12556 ix86_compare_op1, VOIDmode, 0, 1);
12557
12558 out = expand_simple_binop (mode, PLUS, copy_rtx (out), constm1_rtx,
12559 copy_rtx (out), 1, OPTAB_DIRECT);
12560 }
12561
12562 out = expand_simple_binop (mode, AND, copy_rtx (out),
12563 gen_int_mode (cf - ct, mode),
12564 copy_rtx (out), 1, OPTAB_DIRECT);
12565 if (ct)
12566 out = expand_simple_binop (mode, PLUS, copy_rtx (out), GEN_INT (ct),
12567 copy_rtx (out), 1, OPTAB_DIRECT);
12568 if (!rtx_equal_p (out, operands[0]))
12569 emit_move_insn (operands[0], copy_rtx (out));
12570
12571 return 1; /* DONE */
12572 }
12573 }
12574
12575 if (!TARGET_CMOVE || (mode == QImode && TARGET_PARTIAL_REG_STALL))
12576 {
12577 /* Try a few things more with specific constants and a variable. */
12578
12579 optab op;
12580 rtx var, orig_out, out, tmp;
12581
12582 if (BRANCH_COST <= 2)
12583 return 0; /* FAIL */
12584
12585 /* If one of the two operands is an interesting constant, load a
12586 constant with the above and mask it in with a logical operation. */
12587
12588 if (CONST_INT_P (operands[2]))
12589 {
12590 var = operands[3];
12591 if (INTVAL (operands[2]) == 0 && operands[3] != constm1_rtx)
12592 operands[3] = constm1_rtx, op = and_optab;
12593 else if (INTVAL (operands[2]) == -1 && operands[3] != const0_rtx)
12594 operands[3] = const0_rtx, op = ior_optab;
12595 else
12596 return 0; /* FAIL */
12597 }
12598 else if (CONST_INT_P (operands[3]))
12599 {
12600 var = operands[2];
12601 if (INTVAL (operands[3]) == 0 && operands[2] != constm1_rtx)
12602 operands[2] = constm1_rtx, op = and_optab;
12603 else if (INTVAL (operands[3]) == -1 && operands[3] != const0_rtx)
12604 operands[2] = const0_rtx, op = ior_optab;
12605 else
12606 return 0; /* FAIL */
12607 }
12608 else
12609 return 0; /* FAIL */
12610
12611 orig_out = operands[0];
12612 tmp = gen_reg_rtx (mode);
12613 operands[0] = tmp;
12614
12615 /* Recurse to get the constant loaded. */
12616 if (ix86_expand_int_movcc (operands) == 0)
12617 return 0; /* FAIL */
12618
12619 /* Mask in the interesting variable. */
12620 out = expand_binop (mode, op, var, tmp, orig_out, 0,
12621 OPTAB_WIDEN);
12622 if (!rtx_equal_p (out, orig_out))
12623 emit_move_insn (copy_rtx (orig_out), copy_rtx (out));
12624
12625 return 1; /* DONE */
12626 }
12627
12628 /*
12629 * For comparison with above,
12630 *
12631 * movl cf,dest
12632 * movl ct,tmp
12633 * cmpl op1,op2
12634 * cmovcc tmp,dest
12635 *
12636 * Size 15.
12637 */
12638
12639 if (! nonimmediate_operand (operands[2], mode))
12640 operands[2] = force_reg (mode, operands[2]);
12641 if (! nonimmediate_operand (operands[3], mode))
12642 operands[3] = force_reg (mode, operands[3]);
12643
12644 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12645 {
12646 rtx tmp = gen_reg_rtx (mode);
12647 emit_move_insn (tmp, operands[3]);
12648 operands[3] = tmp;
12649 }
12650 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12651 {
12652 rtx tmp = gen_reg_rtx (mode);
12653 emit_move_insn (tmp, operands[2]);
12654 operands[2] = tmp;
12655 }
12656
12657 if (! register_operand (operands[2], VOIDmode)
12658 && (mode == QImode
12659 || ! register_operand (operands[3], VOIDmode)))
12660 operands[2] = force_reg (mode, operands[2]);
12661
12662 if (mode == QImode
12663 && ! register_operand (operands[3], VOIDmode))
12664 operands[3] = force_reg (mode, operands[3]);
12665
12666 emit_insn (compare_seq);
12667 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12668 gen_rtx_IF_THEN_ELSE (mode,
12669 compare_op, operands[2],
12670 operands[3])));
12671 if (bypass_test)
12672 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12673 gen_rtx_IF_THEN_ELSE (mode,
12674 bypass_test,
12675 copy_rtx (operands[3]),
12676 copy_rtx (operands[0]))));
12677 if (second_test)
12678 emit_insn (gen_rtx_SET (VOIDmode, copy_rtx (operands[0]),
12679 gen_rtx_IF_THEN_ELSE (mode,
12680 second_test,
12681 copy_rtx (operands[2]),
12682 copy_rtx (operands[0]))));
12683
12684 return 1; /* DONE */
12685 }
12686
12687 /* Swap, force into registers, or otherwise massage the two operands
12688 to an sse comparison with a mask result. Thus we differ a bit from
12689 ix86_prepare_fp_compare_args which expects to produce a flags result.
12690
12691 The DEST operand exists to help determine whether to commute commutative
12692 operators. The POP0/POP1 operands are updated in place. The new
12693 comparison code is returned, or UNKNOWN if not implementable. */
12694
12695 static enum rtx_code
12696 ix86_prepare_sse_fp_compare_args (rtx dest, enum rtx_code code,
12697 rtx *pop0, rtx *pop1)
12698 {
12699 rtx tmp;
12700
12701 switch (code)
12702 {
12703 case LTGT:
12704 case UNEQ:
12705 /* We have no LTGT as an operator. We could implement it with
12706 NE & ORDERED, but this requires an extra temporary. It's
12707 not clear that it's worth it. */
12708 return UNKNOWN;
12709
12710 case LT:
12711 case LE:
12712 case UNGT:
12713 case UNGE:
12714 /* These are supported directly. */
12715 break;
12716
12717 case EQ:
12718 case NE:
12719 case UNORDERED:
12720 case ORDERED:
12721 /* For commutative operators, try to canonicalize the destination
12722 operand to be first in the comparison - this helps reload to
12723 avoid extra moves. */
12724 if (!dest || !rtx_equal_p (dest, *pop1))
12725 break;
12726 /* FALLTHRU */
12727
12728 case GE:
12729 case GT:
12730 case UNLE:
12731 case UNLT:
12732 /* These are not supported directly. Swap the comparison operands
12733 to transform into something that is supported. */
12734 tmp = *pop0;
12735 *pop0 = *pop1;
12736 *pop1 = tmp;
12737 code = swap_condition (code);
12738 break;
12739
12740 default:
12741 gcc_unreachable ();
12742 }
12743
12744 return code;
12745 }
12746
12747 /* Detect conditional moves that exactly match min/max operational
12748 semantics. Note that this is IEEE safe, as long as we don't
12749 interchange the operands.
12750
12751 Returns FALSE if this conditional move doesn't match a MIN/MAX,
12752 and TRUE if the operation is successful and instructions are emitted. */
12753
12754 static bool
12755 ix86_expand_sse_fp_minmax (rtx dest, enum rtx_code code, rtx cmp_op0,
12756 rtx cmp_op1, rtx if_true, rtx if_false)
12757 {
12758 enum machine_mode mode;
12759 bool is_min;
12760 rtx tmp;
12761
12762 if (code == LT)
12763 ;
12764 else if (code == UNGE)
12765 {
12766 tmp = if_true;
12767 if_true = if_false;
12768 if_false = tmp;
12769 }
12770 else
12771 return false;
12772
12773 if (rtx_equal_p (cmp_op0, if_true) && rtx_equal_p (cmp_op1, if_false))
12774 is_min = true;
12775 else if (rtx_equal_p (cmp_op1, if_true) && rtx_equal_p (cmp_op0, if_false))
12776 is_min = false;
12777 else
12778 return false;
12779
12780 mode = GET_MODE (dest);
12781
12782 /* We want to check HONOR_NANS and HONOR_SIGNED_ZEROS here,
12783 but MODE may be a vector mode and thus not appropriate. */
12784 if (!flag_finite_math_only || !flag_unsafe_math_optimizations)
12785 {
12786 int u = is_min ? UNSPEC_IEEE_MIN : UNSPEC_IEEE_MAX;
12787 rtvec v;
12788
12789 if_true = force_reg (mode, if_true);
12790 v = gen_rtvec (2, if_true, if_false);
12791 tmp = gen_rtx_UNSPEC (mode, v, u);
12792 }
12793 else
12794 {
12795 code = is_min ? SMIN : SMAX;
12796 tmp = gen_rtx_fmt_ee (code, mode, if_true, if_false);
12797 }
12798
12799 emit_insn (gen_rtx_SET (VOIDmode, dest, tmp));
12800 return true;
12801 }
12802
12803 /* Expand an sse vector comparison. Return the register with the result. */
12804
12805 static rtx
12806 ix86_expand_sse_cmp (rtx dest, enum rtx_code code, rtx cmp_op0, rtx cmp_op1,
12807 rtx op_true, rtx op_false)
12808 {
12809 enum machine_mode mode = GET_MODE (dest);
12810 rtx x;
12811
12812 cmp_op0 = force_reg (mode, cmp_op0);
12813 if (!nonimmediate_operand (cmp_op1, mode))
12814 cmp_op1 = force_reg (mode, cmp_op1);
12815
12816 if (optimize
12817 || reg_overlap_mentioned_p (dest, op_true)
12818 || reg_overlap_mentioned_p (dest, op_false))
12819 dest = gen_reg_rtx (mode);
12820
12821 x = gen_rtx_fmt_ee (code, mode, cmp_op0, cmp_op1);
12822 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12823
12824 return dest;
12825 }
12826
12827 /* Expand DEST = CMP ? OP_TRUE : OP_FALSE into a sequence of logical
12828 operations. This is used for both scalar and vector conditional moves. */
12829
12830 static void
12831 ix86_expand_sse_movcc (rtx dest, rtx cmp, rtx op_true, rtx op_false)
12832 {
12833 enum machine_mode mode = GET_MODE (dest);
12834 rtx t2, t3, x;
12835
12836 if (op_false == CONST0_RTX (mode))
12837 {
12838 op_true = force_reg (mode, op_true);
12839 x = gen_rtx_AND (mode, cmp, op_true);
12840 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12841 }
12842 else if (op_true == CONST0_RTX (mode))
12843 {
12844 op_false = force_reg (mode, op_false);
12845 x = gen_rtx_NOT (mode, cmp);
12846 x = gen_rtx_AND (mode, x, op_false);
12847 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12848 }
12849 else
12850 {
12851 op_true = force_reg (mode, op_true);
12852 op_false = force_reg (mode, op_false);
12853
12854 t2 = gen_reg_rtx (mode);
12855 if (optimize)
12856 t3 = gen_reg_rtx (mode);
12857 else
12858 t3 = dest;
12859
12860 x = gen_rtx_AND (mode, op_true, cmp);
12861 emit_insn (gen_rtx_SET (VOIDmode, t2, x));
12862
12863 x = gen_rtx_NOT (mode, cmp);
12864 x = gen_rtx_AND (mode, x, op_false);
12865 emit_insn (gen_rtx_SET (VOIDmode, t3, x));
12866
12867 x = gen_rtx_IOR (mode, t3, t2);
12868 emit_insn (gen_rtx_SET (VOIDmode, dest, x));
12869 }
12870 }
12871
12872 /* Expand a floating-point conditional move. Return true if successful. */
12873
12874 int
12875 ix86_expand_fp_movcc (rtx operands[])
12876 {
12877 enum machine_mode mode = GET_MODE (operands[0]);
12878 enum rtx_code code = GET_CODE (operands[1]);
12879 rtx tmp, compare_op, second_test, bypass_test;
12880
12881 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
12882 {
12883 enum machine_mode cmode;
12884
12885 /* Since we've no cmove for sse registers, don't force bad register
12886 allocation just to gain access to it. Deny movcc when the
12887 comparison mode doesn't match the move mode. */
12888 cmode = GET_MODE (ix86_compare_op0);
12889 if (cmode == VOIDmode)
12890 cmode = GET_MODE (ix86_compare_op1);
12891 if (cmode != mode)
12892 return 0;
12893
12894 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12895 &ix86_compare_op0,
12896 &ix86_compare_op1);
12897 if (code == UNKNOWN)
12898 return 0;
12899
12900 if (ix86_expand_sse_fp_minmax (operands[0], code, ix86_compare_op0,
12901 ix86_compare_op1, operands[2],
12902 operands[3]))
12903 return 1;
12904
12905 tmp = ix86_expand_sse_cmp (operands[0], code, ix86_compare_op0,
12906 ix86_compare_op1, operands[2], operands[3]);
12907 ix86_expand_sse_movcc (operands[0], tmp, operands[2], operands[3]);
12908 return 1;
12909 }
12910
12911 /* The floating point conditional move instructions don't directly
12912 support conditions resulting from a signed integer comparison. */
12913
12914 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12915
12916 /* The floating point conditional move instructions don't directly
12917 support signed integer comparisons. */
12918
12919 if (!fcmov_comparison_operator (compare_op, VOIDmode))
12920 {
12921 gcc_assert (!second_test && !bypass_test);
12922 tmp = gen_reg_rtx (QImode);
12923 ix86_expand_setcc (code, tmp);
12924 code = NE;
12925 ix86_compare_op0 = tmp;
12926 ix86_compare_op1 = const0_rtx;
12927 compare_op = ix86_expand_compare (code, &second_test, &bypass_test);
12928 }
12929 if (bypass_test && reg_overlap_mentioned_p (operands[0], operands[3]))
12930 {
12931 tmp = gen_reg_rtx (mode);
12932 emit_move_insn (tmp, operands[3]);
12933 operands[3] = tmp;
12934 }
12935 if (second_test && reg_overlap_mentioned_p (operands[0], operands[2]))
12936 {
12937 tmp = gen_reg_rtx (mode);
12938 emit_move_insn (tmp, operands[2]);
12939 operands[2] = tmp;
12940 }
12941
12942 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12943 gen_rtx_IF_THEN_ELSE (mode, compare_op,
12944 operands[2], operands[3])));
12945 if (bypass_test)
12946 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12947 gen_rtx_IF_THEN_ELSE (mode, bypass_test,
12948 operands[3], operands[0])));
12949 if (second_test)
12950 emit_insn (gen_rtx_SET (VOIDmode, operands[0],
12951 gen_rtx_IF_THEN_ELSE (mode, second_test,
12952 operands[2], operands[0])));
12953
12954 return 1;
12955 }
12956
12957 /* Expand a floating-point vector conditional move; a vcond operation
12958 rather than a movcc operation. */
12959
12960 bool
12961 ix86_expand_fp_vcond (rtx operands[])
12962 {
12963 enum rtx_code code = GET_CODE (operands[3]);
12964 rtx cmp;
12965
12966 code = ix86_prepare_sse_fp_compare_args (operands[0], code,
12967 &operands[4], &operands[5]);
12968 if (code == UNKNOWN)
12969 return false;
12970
12971 if (ix86_expand_sse_fp_minmax (operands[0], code, operands[4],
12972 operands[5], operands[1], operands[2]))
12973 return true;
12974
12975 cmp = ix86_expand_sse_cmp (operands[0], code, operands[4], operands[5],
12976 operands[1], operands[2]);
12977 ix86_expand_sse_movcc (operands[0], cmp, operands[1], operands[2]);
12978 return true;
12979 }
12980
12981 /* Expand a signed/unsigned integral vector conditional move. */
12982
12983 bool
12984 ix86_expand_int_vcond (rtx operands[])
12985 {
12986 enum machine_mode mode = GET_MODE (operands[0]);
12987 enum rtx_code code = GET_CODE (operands[3]);
12988 bool negate = false;
12989 rtx x, cop0, cop1;
12990
12991 cop0 = operands[4];
12992 cop1 = operands[5];
12993
12994 /* Canonicalize the comparison to EQ, GT, GTU. */
12995 switch (code)
12996 {
12997 case EQ:
12998 case GT:
12999 case GTU:
13000 break;
13001
13002 case NE:
13003 case LE:
13004 case LEU:
13005 code = reverse_condition (code);
13006 negate = true;
13007 break;
13008
13009 case GE:
13010 case GEU:
13011 code = reverse_condition (code);
13012 negate = true;
13013 /* FALLTHRU */
13014
13015 case LT:
13016 case LTU:
13017 code = swap_condition (code);
13018 x = cop0, cop0 = cop1, cop1 = x;
13019 break;
13020
13021 default:
13022 gcc_unreachable ();
13023 }
13024
13025 /* Only SSE4.1/SSE4.2 supports V2DImode. */
13026 if (mode == V2DImode)
13027 {
13028 switch (code)
13029 {
13030 case EQ:
13031 /* SSE4.1 supports EQ. */
13032 if (!TARGET_SSE4_1)
13033 return false;
13034 break;
13035
13036 case GT:
13037 case GTU:
13038 /* SSE4.2 supports GT/GTU. */
13039 if (!TARGET_SSE4_2)
13040 return false;
13041 break;
13042
13043 default:
13044 gcc_unreachable ();
13045 }
13046 }
13047
13048 /* Unsigned parallel compare is not supported by the hardware. Play some
13049 tricks to turn this into a signed comparison against 0. */
13050 if (code == GTU)
13051 {
13052 cop0 = force_reg (mode, cop0);
13053
13054 switch (mode)
13055 {
13056 case V4SImode:
13057 case V2DImode:
13058 {
13059 rtx t1, t2, mask;
13060
13061 /* Perform a parallel modulo subtraction. */
13062 t1 = gen_reg_rtx (mode);
13063 emit_insn ((mode == V4SImode
13064 ? gen_subv4si3
13065 : gen_subv2di3) (t1, cop0, cop1));
13066
13067 /* Extract the original sign bit of op0. */
13068 mask = ix86_build_signbit_mask (GET_MODE_INNER (mode),
13069 true, false);
13070 t2 = gen_reg_rtx (mode);
13071 emit_insn ((mode == V4SImode
13072 ? gen_andv4si3
13073 : gen_andv2di3) (t2, cop0, mask));
13074
13075 /* XOR it back into the result of the subtraction. This results
13076 in the sign bit set iff we saw unsigned underflow. */
13077 x = gen_reg_rtx (mode);
13078 emit_insn ((mode == V4SImode
13079 ? gen_xorv4si3
13080 : gen_xorv2di3) (x, t1, t2));
13081
13082 code = GT;
13083 }
13084 break;
13085
13086 case V16QImode:
13087 case V8HImode:
13088 /* Perform a parallel unsigned saturating subtraction. */
13089 x = gen_reg_rtx (mode);
13090 emit_insn (gen_rtx_SET (VOIDmode, x,
13091 gen_rtx_US_MINUS (mode, cop0, cop1)));
13092
13093 code = EQ;
13094 negate = !negate;
13095 break;
13096
13097 default:
13098 gcc_unreachable ();
13099 }
13100
13101 cop0 = x;
13102 cop1 = CONST0_RTX (mode);
13103 }
13104
13105 x = ix86_expand_sse_cmp (operands[0], code, cop0, cop1,
13106 operands[1+negate], operands[2-negate]);
13107
13108 ix86_expand_sse_movcc (operands[0], x, operands[1+negate],
13109 operands[2-negate]);
13110 return true;
13111 }
13112
13113 /* Unpack OP[1] into the next wider integer vector type. UNSIGNED_P is
13114 true if we should do zero extension, else sign extension. HIGH_P is
13115 true if we want the N/2 high elements, else the low elements. */
13116
13117 void
13118 ix86_expand_sse_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13119 {
13120 enum machine_mode imode = GET_MODE (operands[1]);
13121 rtx (*unpack)(rtx, rtx, rtx);
13122 rtx se, dest;
13123
13124 switch (imode)
13125 {
13126 case V16QImode:
13127 if (high_p)
13128 unpack = gen_vec_interleave_highv16qi;
13129 else
13130 unpack = gen_vec_interleave_lowv16qi;
13131 break;
13132 case V8HImode:
13133 if (high_p)
13134 unpack = gen_vec_interleave_highv8hi;
13135 else
13136 unpack = gen_vec_interleave_lowv8hi;
13137 break;
13138 case V4SImode:
13139 if (high_p)
13140 unpack = gen_vec_interleave_highv4si;
13141 else
13142 unpack = gen_vec_interleave_lowv4si;
13143 break;
13144 default:
13145 gcc_unreachable ();
13146 }
13147
13148 dest = gen_lowpart (imode, operands[0]);
13149
13150 if (unsigned_p)
13151 se = force_reg (imode, CONST0_RTX (imode));
13152 else
13153 se = ix86_expand_sse_cmp (gen_reg_rtx (imode), GT, CONST0_RTX (imode),
13154 operands[1], pc_rtx, pc_rtx);
13155
13156 emit_insn (unpack (dest, operands[1], se));
13157 }
13158
13159 /* This function performs the same task as ix86_expand_sse_unpack,
13160 but with SSE4.1 instructions. */
13161
13162 void
13163 ix86_expand_sse4_unpack (rtx operands[2], bool unsigned_p, bool high_p)
13164 {
13165 enum machine_mode imode = GET_MODE (operands[1]);
13166 rtx (*unpack)(rtx, rtx);
13167 rtx src, dest;
13168
13169 switch (imode)
13170 {
13171 case V16QImode:
13172 if (unsigned_p)
13173 unpack = gen_sse4_1_zero_extendv8qiv8hi2;
13174 else
13175 unpack = gen_sse4_1_extendv8qiv8hi2;
13176 break;
13177 case V8HImode:
13178 if (unsigned_p)
13179 unpack = gen_sse4_1_zero_extendv4hiv4si2;
13180 else
13181 unpack = gen_sse4_1_extendv4hiv4si2;
13182 break;
13183 case V4SImode:
13184 if (unsigned_p)
13185 unpack = gen_sse4_1_zero_extendv2siv2di2;
13186 else
13187 unpack = gen_sse4_1_extendv2siv2di2;
13188 break;
13189 default:
13190 gcc_unreachable ();
13191 }
13192
13193 dest = operands[0];
13194 if (high_p)
13195 {
13196 /* Shift higher 8 bytes to lower 8 bytes. */
13197 src = gen_reg_rtx (imode);
13198 emit_insn (gen_sse2_lshrti3 (gen_lowpart (TImode, src),
13199 gen_lowpart (TImode, operands[1]),
13200 GEN_INT (64)));
13201 }
13202 else
13203 src = operands[1];
13204
13205 emit_insn (unpack (dest, src));
13206 }
13207
13208 /* Expand conditional increment or decrement using adb/sbb instructions.
13209 The default case using setcc followed by the conditional move can be
13210 done by generic code. */
13211 int
13212 ix86_expand_int_addcc (rtx operands[])
13213 {
13214 enum rtx_code code = GET_CODE (operands[1]);
13215 rtx compare_op;
13216 rtx val = const0_rtx;
13217 bool fpcmp = false;
13218 enum machine_mode mode = GET_MODE (operands[0]);
13219
13220 if (operands[3] != const1_rtx
13221 && operands[3] != constm1_rtx)
13222 return 0;
13223 if (!ix86_expand_carry_flag_compare (code, ix86_compare_op0,
13224 ix86_compare_op1, &compare_op))
13225 return 0;
13226 code = GET_CODE (compare_op);
13227
13228 if (GET_MODE (XEXP (compare_op, 0)) == CCFPmode
13229 || GET_MODE (XEXP (compare_op, 0)) == CCFPUmode)
13230 {
13231 fpcmp = true;
13232 code = ix86_fp_compare_code_to_integer (code);
13233 }
13234
13235 if (code != LTU)
13236 {
13237 val = constm1_rtx;
13238 if (fpcmp)
13239 PUT_CODE (compare_op,
13240 reverse_condition_maybe_unordered
13241 (GET_CODE (compare_op)));
13242 else
13243 PUT_CODE (compare_op, reverse_condition (GET_CODE (compare_op)));
13244 }
13245 PUT_MODE (compare_op, mode);
13246
13247 /* Construct either adc or sbb insn. */
13248 if ((code == LTU) == (operands[3] == constm1_rtx))
13249 {
13250 switch (GET_MODE (operands[0]))
13251 {
13252 case QImode:
13253 emit_insn (gen_subqi3_carry (operands[0], operands[2], val, compare_op));
13254 break;
13255 case HImode:
13256 emit_insn (gen_subhi3_carry (operands[0], operands[2], val, compare_op));
13257 break;
13258 case SImode:
13259 emit_insn (gen_subsi3_carry (operands[0], operands[2], val, compare_op));
13260 break;
13261 case DImode:
13262 emit_insn (gen_subdi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13263 break;
13264 default:
13265 gcc_unreachable ();
13266 }
13267 }
13268 else
13269 {
13270 switch (GET_MODE (operands[0]))
13271 {
13272 case QImode:
13273 emit_insn (gen_addqi3_carry (operands[0], operands[2], val, compare_op));
13274 break;
13275 case HImode:
13276 emit_insn (gen_addhi3_carry (operands[0], operands[2], val, compare_op));
13277 break;
13278 case SImode:
13279 emit_insn (gen_addsi3_carry (operands[0], operands[2], val, compare_op));
13280 break;
13281 case DImode:
13282 emit_insn (gen_adddi3_carry_rex64 (operands[0], operands[2], val, compare_op));
13283 break;
13284 default:
13285 gcc_unreachable ();
13286 }
13287 }
13288 return 1; /* DONE */
13289 }
13290
13291
13292 /* Split operands 0 and 1 into SImode parts. Similar to split_di, but
13293 works for floating pointer parameters and nonoffsetable memories.
13294 For pushes, it returns just stack offsets; the values will be saved
13295 in the right order. Maximally three parts are generated. */
13296
13297 static int
13298 ix86_split_to_parts (rtx operand, rtx *parts, enum machine_mode mode)
13299 {
13300 int size;
13301
13302 if (!TARGET_64BIT)
13303 size = mode==XFmode ? 3 : GET_MODE_SIZE (mode) / 4;
13304 else
13305 size = (GET_MODE_SIZE (mode) + 4) / 8;
13306
13307 gcc_assert (!REG_P (operand) || !MMX_REGNO_P (REGNO (operand)));
13308 gcc_assert (size >= 2 && size <= 3);
13309
13310 /* Optimize constant pool reference to immediates. This is used by fp
13311 moves, that force all constants to memory to allow combining. */
13312 if (MEM_P (operand) && MEM_READONLY_P (operand))
13313 {
13314 rtx tmp = maybe_get_pool_constant (operand);
13315 if (tmp)
13316 operand = tmp;
13317 }
13318
13319 if (MEM_P (operand) && !offsettable_memref_p (operand))
13320 {
13321 /* The only non-offsetable memories we handle are pushes. */
13322 int ok = push_operand (operand, VOIDmode);
13323
13324 gcc_assert (ok);
13325
13326 operand = copy_rtx (operand);
13327 PUT_MODE (operand, Pmode);
13328 parts[0] = parts[1] = parts[2] = operand;
13329 return size;
13330 }
13331
13332 if (GET_CODE (operand) == CONST_VECTOR)
13333 {
13334 enum machine_mode imode = int_mode_for_mode (mode);
13335 /* Caution: if we looked through a constant pool memory above,
13336 the operand may actually have a different mode now. That's
13337 ok, since we want to pun this all the way back to an integer. */
13338 operand = simplify_subreg (imode, operand, GET_MODE (operand), 0);
13339 gcc_assert (operand != NULL);
13340 mode = imode;
13341 }
13342
13343 if (!TARGET_64BIT)
13344 {
13345 if (mode == DImode)
13346 split_di (&operand, 1, &parts[0], &parts[1]);
13347 else
13348 {
13349 if (REG_P (operand))
13350 {
13351 gcc_assert (reload_completed);
13352 parts[0] = gen_rtx_REG (SImode, REGNO (operand) + 0);
13353 parts[1] = gen_rtx_REG (SImode, REGNO (operand) + 1);
13354 if (size == 3)
13355 parts[2] = gen_rtx_REG (SImode, REGNO (operand) + 2);
13356 }
13357 else if (offsettable_memref_p (operand))
13358 {
13359 operand = adjust_address (operand, SImode, 0);
13360 parts[0] = operand;
13361 parts[1] = adjust_address (operand, SImode, 4);
13362 if (size == 3)
13363 parts[2] = adjust_address (operand, SImode, 8);
13364 }
13365 else if (GET_CODE (operand) == CONST_DOUBLE)
13366 {
13367 REAL_VALUE_TYPE r;
13368 long l[4];
13369
13370 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13371 switch (mode)
13372 {
13373 case XFmode:
13374 REAL_VALUE_TO_TARGET_LONG_DOUBLE (r, l);
13375 parts[2] = gen_int_mode (l[2], SImode);
13376 break;
13377 case DFmode:
13378 REAL_VALUE_TO_TARGET_DOUBLE (r, l);
13379 break;
13380 default:
13381 gcc_unreachable ();
13382 }
13383 parts[1] = gen_int_mode (l[1], SImode);
13384 parts[0] = gen_int_mode (l[0], SImode);
13385 }
13386 else
13387 gcc_unreachable ();
13388 }
13389 }
13390 else
13391 {
13392 if (mode == TImode)
13393 split_ti (&operand, 1, &parts[0], &parts[1]);
13394 if (mode == XFmode || mode == TFmode)
13395 {
13396 enum machine_mode upper_mode = mode==XFmode ? SImode : DImode;
13397 if (REG_P (operand))
13398 {
13399 gcc_assert (reload_completed);
13400 parts[0] = gen_rtx_REG (DImode, REGNO (operand) + 0);
13401 parts[1] = gen_rtx_REG (upper_mode, REGNO (operand) + 1);
13402 }
13403 else if (offsettable_memref_p (operand))
13404 {
13405 operand = adjust_address (operand, DImode, 0);
13406 parts[0] = operand;
13407 parts[1] = adjust_address (operand, upper_mode, 8);
13408 }
13409 else if (GET_CODE (operand) == CONST_DOUBLE)
13410 {
13411 REAL_VALUE_TYPE r;
13412 long l[4];
13413
13414 REAL_VALUE_FROM_CONST_DOUBLE (r, operand);
13415 real_to_target (l, &r, mode);
13416
13417 /* Do not use shift by 32 to avoid warning on 32bit systems. */
13418 if (HOST_BITS_PER_WIDE_INT >= 64)
13419 parts[0]
13420 = gen_int_mode
13421 ((l[0] & (((HOST_WIDE_INT) 2 << 31) - 1))
13422 + ((((HOST_WIDE_INT) l[1]) << 31) << 1),
13423 DImode);
13424 else
13425 parts[0] = immed_double_const (l[0], l[1], DImode);
13426
13427 if (upper_mode == SImode)
13428 parts[1] = gen_int_mode (l[2], SImode);
13429 else if (HOST_BITS_PER_WIDE_INT >= 64)
13430 parts[1]
13431 = gen_int_mode
13432 ((l[2] & (((HOST_WIDE_INT) 2 << 31) - 1))
13433 + ((((HOST_WIDE_INT) l[3]) << 31) << 1),
13434 DImode);
13435 else
13436 parts[1] = immed_double_const (l[2], l[3], DImode);
13437 }
13438 else
13439 gcc_unreachable ();
13440 }
13441 }
13442
13443 return size;
13444 }
13445
13446 /* Emit insns to perform a move or push of DI, DF, and XF values.
13447 Return false when normal moves are needed; true when all required
13448 insns have been emitted. Operands 2-4 contain the input values
13449 int the correct order; operands 5-7 contain the output values. */
13450
13451 void
13452 ix86_split_long_move (rtx operands[])
13453 {
13454 rtx part[2][3];
13455 int nparts;
13456 int push = 0;
13457 int collisions = 0;
13458 enum machine_mode mode = GET_MODE (operands[0]);
13459
13460 /* The DFmode expanders may ask us to move double.
13461 For 64bit target this is single move. By hiding the fact
13462 here we simplify i386.md splitters. */
13463 if (GET_MODE_SIZE (GET_MODE (operands[0])) == 8 && TARGET_64BIT)
13464 {
13465 /* Optimize constant pool reference to immediates. This is used by
13466 fp moves, that force all constants to memory to allow combining. */
13467
13468 if (MEM_P (operands[1])
13469 && GET_CODE (XEXP (operands[1], 0)) == SYMBOL_REF
13470 && CONSTANT_POOL_ADDRESS_P (XEXP (operands[1], 0)))
13471 operands[1] = get_pool_constant (XEXP (operands[1], 0));
13472 if (push_operand (operands[0], VOIDmode))
13473 {
13474 operands[0] = copy_rtx (operands[0]);
13475 PUT_MODE (operands[0], Pmode);
13476 }
13477 else
13478 operands[0] = gen_lowpart (DImode, operands[0]);
13479 operands[1] = gen_lowpart (DImode, operands[1]);
13480 emit_move_insn (operands[0], operands[1]);
13481 return;
13482 }
13483
13484 /* The only non-offsettable memory we handle is push. */
13485 if (push_operand (operands[0], VOIDmode))
13486 push = 1;
13487 else
13488 gcc_assert (!MEM_P (operands[0])
13489 || offsettable_memref_p (operands[0]));
13490
13491 nparts = ix86_split_to_parts (operands[1], part[1], GET_MODE (operands[0]));
13492 ix86_split_to_parts (operands[0], part[0], GET_MODE (operands[0]));
13493
13494 /* When emitting push, take care for source operands on the stack. */
13495 if (push && MEM_P (operands[1])
13496 && reg_overlap_mentioned_p (stack_pointer_rtx, operands[1]))
13497 {
13498 if (nparts == 3)
13499 part[1][1] = change_address (part[1][1], GET_MODE (part[1][1]),
13500 XEXP (part[1][2], 0));
13501 part[1][0] = change_address (part[1][0], GET_MODE (part[1][0]),
13502 XEXP (part[1][1], 0));
13503 }
13504
13505 /* We need to do copy in the right order in case an address register
13506 of the source overlaps the destination. */
13507 if (REG_P (part[0][0]) && MEM_P (part[1][0]))
13508 {
13509 if (reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0)))
13510 collisions++;
13511 if (reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13512 collisions++;
13513 if (nparts == 3
13514 && reg_overlap_mentioned_p (part[0][2], XEXP (part[1][0], 0)))
13515 collisions++;
13516
13517 /* Collision in the middle part can be handled by reordering. */
13518 if (collisions == 1 && nparts == 3
13519 && reg_overlap_mentioned_p (part[0][1], XEXP (part[1][0], 0)))
13520 {
13521 rtx tmp;
13522 tmp = part[0][1]; part[0][1] = part[0][2]; part[0][2] = tmp;
13523 tmp = part[1][1]; part[1][1] = part[1][2]; part[1][2] = tmp;
13524 }
13525
13526 /* If there are more collisions, we can't handle it by reordering.
13527 Do an lea to the last part and use only one colliding move. */
13528 else if (collisions > 1)
13529 {
13530 rtx base;
13531
13532 collisions = 1;
13533
13534 base = part[0][nparts - 1];
13535
13536 /* Handle the case when the last part isn't valid for lea.
13537 Happens in 64-bit mode storing the 12-byte XFmode. */
13538 if (GET_MODE (base) != Pmode)
13539 base = gen_rtx_REG (Pmode, REGNO (base));
13540
13541 emit_insn (gen_rtx_SET (VOIDmode, base, XEXP (part[1][0], 0)));
13542 part[1][0] = replace_equiv_address (part[1][0], base);
13543 part[1][1] = replace_equiv_address (part[1][1],
13544 plus_constant (base, UNITS_PER_WORD));
13545 if (nparts == 3)
13546 part[1][2] = replace_equiv_address (part[1][2],
13547 plus_constant (base, 8));
13548 }
13549 }
13550
13551 if (push)
13552 {
13553 if (!TARGET_64BIT)
13554 {
13555 if (nparts == 3)
13556 {
13557 if (TARGET_128BIT_LONG_DOUBLE && mode == XFmode)
13558 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, GEN_INT (-4)));
13559 emit_move_insn (part[0][2], part[1][2]);
13560 }
13561 }
13562 else
13563 {
13564 /* In 64bit mode we don't have 32bit push available. In case this is
13565 register, it is OK - we will just use larger counterpart. We also
13566 retype memory - these comes from attempt to avoid REX prefix on
13567 moving of second half of TFmode value. */
13568 if (GET_MODE (part[1][1]) == SImode)
13569 {
13570 switch (GET_CODE (part[1][1]))
13571 {
13572 case MEM:
13573 part[1][1] = adjust_address (part[1][1], DImode, 0);
13574 break;
13575
13576 case REG:
13577 part[1][1] = gen_rtx_REG (DImode, REGNO (part[1][1]));
13578 break;
13579
13580 default:
13581 gcc_unreachable ();
13582 }
13583
13584 if (GET_MODE (part[1][0]) == SImode)
13585 part[1][0] = part[1][1];
13586 }
13587 }
13588 emit_move_insn (part[0][1], part[1][1]);
13589 emit_move_insn (part[0][0], part[1][0]);
13590 return;
13591 }
13592
13593 /* Choose correct order to not overwrite the source before it is copied. */
13594 if ((REG_P (part[0][0])
13595 && REG_P (part[1][1])
13596 && (REGNO (part[0][0]) == REGNO (part[1][1])
13597 || (nparts == 3
13598 && REGNO (part[0][0]) == REGNO (part[1][2]))))
13599 || (collisions > 0
13600 && reg_overlap_mentioned_p (part[0][0], XEXP (part[1][0], 0))))
13601 {
13602 if (nparts == 3)
13603 {
13604 operands[2] = part[0][2];
13605 operands[3] = part[0][1];
13606 operands[4] = part[0][0];
13607 operands[5] = part[1][2];
13608 operands[6] = part[1][1];
13609 operands[7] = part[1][0];
13610 }
13611 else
13612 {
13613 operands[2] = part[0][1];
13614 operands[3] = part[0][0];
13615 operands[5] = part[1][1];
13616 operands[6] = part[1][0];
13617 }
13618 }
13619 else
13620 {
13621 if (nparts == 3)
13622 {
13623 operands[2] = part[0][0];
13624 operands[3] = part[0][1];
13625 operands[4] = part[0][2];
13626 operands[5] = part[1][0];
13627 operands[6] = part[1][1];
13628 operands[7] = part[1][2];
13629 }
13630 else
13631 {
13632 operands[2] = part[0][0];
13633 operands[3] = part[0][1];
13634 operands[5] = part[1][0];
13635 operands[6] = part[1][1];
13636 }
13637 }
13638
13639 /* If optimizing for size, attempt to locally unCSE nonzero constants. */
13640 if (optimize_size)
13641 {
13642 if (CONST_INT_P (operands[5])
13643 && operands[5] != const0_rtx
13644 && REG_P (operands[2]))
13645 {
13646 if (CONST_INT_P (operands[6])
13647 && INTVAL (operands[6]) == INTVAL (operands[5]))
13648 operands[6] = operands[2];
13649
13650 if (nparts == 3
13651 && CONST_INT_P (operands[7])
13652 && INTVAL (operands[7]) == INTVAL (operands[5]))
13653 operands[7] = operands[2];
13654 }
13655
13656 if (nparts == 3
13657 && CONST_INT_P (operands[6])
13658 && operands[6] != const0_rtx
13659 && REG_P (operands[3])
13660 && CONST_INT_P (operands[7])
13661 && INTVAL (operands[7]) == INTVAL (operands[6]))
13662 operands[7] = operands[3];
13663 }
13664
13665 emit_move_insn (operands[2], operands[5]);
13666 emit_move_insn (operands[3], operands[6]);
13667 if (nparts == 3)
13668 emit_move_insn (operands[4], operands[7]);
13669
13670 return;
13671 }
13672
13673 /* Helper function of ix86_split_ashl used to generate an SImode/DImode
13674 left shift by a constant, either using a single shift or
13675 a sequence of add instructions. */
13676
13677 static void
13678 ix86_expand_ashl_const (rtx operand, int count, enum machine_mode mode)
13679 {
13680 if (count == 1)
13681 {
13682 emit_insn ((mode == DImode
13683 ? gen_addsi3
13684 : gen_adddi3) (operand, operand, operand));
13685 }
13686 else if (!optimize_size
13687 && count * ix86_cost->add <= ix86_cost->shift_const)
13688 {
13689 int i;
13690 for (i=0; i<count; i++)
13691 {
13692 emit_insn ((mode == DImode
13693 ? gen_addsi3
13694 : gen_adddi3) (operand, operand, operand));
13695 }
13696 }
13697 else
13698 emit_insn ((mode == DImode
13699 ? gen_ashlsi3
13700 : gen_ashldi3) (operand, operand, GEN_INT (count)));
13701 }
13702
13703 void
13704 ix86_split_ashl (rtx *operands, rtx scratch, enum machine_mode mode)
13705 {
13706 rtx low[2], high[2];
13707 int count;
13708 const int single_width = mode == DImode ? 32 : 64;
13709
13710 if (CONST_INT_P (operands[2]))
13711 {
13712 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13713 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13714
13715 if (count >= single_width)
13716 {
13717 emit_move_insn (high[0], low[1]);
13718 emit_move_insn (low[0], const0_rtx);
13719
13720 if (count > single_width)
13721 ix86_expand_ashl_const (high[0], count - single_width, mode);
13722 }
13723 else
13724 {
13725 if (!rtx_equal_p (operands[0], operands[1]))
13726 emit_move_insn (operands[0], operands[1]);
13727 emit_insn ((mode == DImode
13728 ? gen_x86_shld_1
13729 : gen_x86_64_shld) (high[0], low[0], GEN_INT (count)));
13730 ix86_expand_ashl_const (low[0], count, mode);
13731 }
13732 return;
13733 }
13734
13735 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13736
13737 if (operands[1] == const1_rtx)
13738 {
13739 /* Assuming we've chosen a QImode capable registers, then 1 << N
13740 can be done with two 32/64-bit shifts, no branches, no cmoves. */
13741 if (ANY_QI_REG_P (low[0]) && ANY_QI_REG_P (high[0]))
13742 {
13743 rtx s, d, flags = gen_rtx_REG (CCZmode, FLAGS_REG);
13744
13745 ix86_expand_clear (low[0]);
13746 ix86_expand_clear (high[0]);
13747 emit_insn (gen_testqi_ccz_1 (operands[2], GEN_INT (single_width)));
13748
13749 d = gen_lowpart (QImode, low[0]);
13750 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13751 s = gen_rtx_EQ (QImode, flags, const0_rtx);
13752 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13753
13754 d = gen_lowpart (QImode, high[0]);
13755 d = gen_rtx_STRICT_LOW_PART (VOIDmode, d);
13756 s = gen_rtx_NE (QImode, flags, const0_rtx);
13757 emit_insn (gen_rtx_SET (VOIDmode, d, s));
13758 }
13759
13760 /* Otherwise, we can get the same results by manually performing
13761 a bit extract operation on bit 5/6, and then performing the two
13762 shifts. The two methods of getting 0/1 into low/high are exactly
13763 the same size. Avoiding the shift in the bit extract case helps
13764 pentium4 a bit; no one else seems to care much either way. */
13765 else
13766 {
13767 rtx x;
13768
13769 if (TARGET_PARTIAL_REG_STALL && !optimize_size)
13770 x = gen_rtx_ZERO_EXTEND (mode == DImode ? SImode : DImode, operands[2]);
13771 else
13772 x = gen_lowpart (mode == DImode ? SImode : DImode, operands[2]);
13773 emit_insn (gen_rtx_SET (VOIDmode, high[0], x));
13774
13775 emit_insn ((mode == DImode
13776 ? gen_lshrsi3
13777 : gen_lshrdi3) (high[0], high[0], GEN_INT (mode == DImode ? 5 : 6)));
13778 emit_insn ((mode == DImode
13779 ? gen_andsi3
13780 : gen_anddi3) (high[0], high[0], GEN_INT (1)));
13781 emit_move_insn (low[0], high[0]);
13782 emit_insn ((mode == DImode
13783 ? gen_xorsi3
13784 : gen_xordi3) (low[0], low[0], GEN_INT (1)));
13785 }
13786
13787 emit_insn ((mode == DImode
13788 ? gen_ashlsi3
13789 : gen_ashldi3) (low[0], low[0], operands[2]));
13790 emit_insn ((mode == DImode
13791 ? gen_ashlsi3
13792 : gen_ashldi3) (high[0], high[0], operands[2]));
13793 return;
13794 }
13795
13796 if (operands[1] == constm1_rtx)
13797 {
13798 /* For -1 << N, we can avoid the shld instruction, because we
13799 know that we're shifting 0...31/63 ones into a -1. */
13800 emit_move_insn (low[0], constm1_rtx);
13801 if (optimize_size)
13802 emit_move_insn (high[0], low[0]);
13803 else
13804 emit_move_insn (high[0], constm1_rtx);
13805 }
13806 else
13807 {
13808 if (!rtx_equal_p (operands[0], operands[1]))
13809 emit_move_insn (operands[0], operands[1]);
13810
13811 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13812 emit_insn ((mode == DImode
13813 ? gen_x86_shld_1
13814 : gen_x86_64_shld) (high[0], low[0], operands[2]));
13815 }
13816
13817 emit_insn ((mode == DImode ? gen_ashlsi3 : gen_ashldi3) (low[0], low[0], operands[2]));
13818
13819 if (TARGET_CMOVE && scratch)
13820 {
13821 ix86_expand_clear (scratch);
13822 emit_insn ((mode == DImode
13823 ? gen_x86_shift_adj_1
13824 : gen_x86_64_shift_adj) (high[0], low[0], operands[2], scratch));
13825 }
13826 else
13827 emit_insn (gen_x86_shift_adj_2 (high[0], low[0], operands[2]));
13828 }
13829
13830 void
13831 ix86_split_ashr (rtx *operands, rtx scratch, enum machine_mode mode)
13832 {
13833 rtx low[2], high[2];
13834 int count;
13835 const int single_width = mode == DImode ? 32 : 64;
13836
13837 if (CONST_INT_P (operands[2]))
13838 {
13839 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13840 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13841
13842 if (count == single_width * 2 - 1)
13843 {
13844 emit_move_insn (high[0], high[1]);
13845 emit_insn ((mode == DImode
13846 ? gen_ashrsi3
13847 : gen_ashrdi3) (high[0], high[0],
13848 GEN_INT (single_width - 1)));
13849 emit_move_insn (low[0], high[0]);
13850
13851 }
13852 else if (count >= single_width)
13853 {
13854 emit_move_insn (low[0], high[1]);
13855 emit_move_insn (high[0], low[0]);
13856 emit_insn ((mode == DImode
13857 ? gen_ashrsi3
13858 : gen_ashrdi3) (high[0], high[0],
13859 GEN_INT (single_width - 1)));
13860 if (count > single_width)
13861 emit_insn ((mode == DImode
13862 ? gen_ashrsi3
13863 : gen_ashrdi3) (low[0], low[0],
13864 GEN_INT (count - single_width)));
13865 }
13866 else
13867 {
13868 if (!rtx_equal_p (operands[0], operands[1]))
13869 emit_move_insn (operands[0], operands[1]);
13870 emit_insn ((mode == DImode
13871 ? gen_x86_shrd_1
13872 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13873 emit_insn ((mode == DImode
13874 ? gen_ashrsi3
13875 : gen_ashrdi3) (high[0], high[0], GEN_INT (count)));
13876 }
13877 }
13878 else
13879 {
13880 if (!rtx_equal_p (operands[0], operands[1]))
13881 emit_move_insn (operands[0], operands[1]);
13882
13883 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13884
13885 emit_insn ((mode == DImode
13886 ? gen_x86_shrd_1
13887 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13888 emit_insn ((mode == DImode
13889 ? gen_ashrsi3
13890 : gen_ashrdi3) (high[0], high[0], operands[2]));
13891
13892 if (TARGET_CMOVE && scratch)
13893 {
13894 emit_move_insn (scratch, high[0]);
13895 emit_insn ((mode == DImode
13896 ? gen_ashrsi3
13897 : gen_ashrdi3) (scratch, scratch,
13898 GEN_INT (single_width - 1)));
13899 emit_insn ((mode == DImode
13900 ? gen_x86_shift_adj_1
13901 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13902 scratch));
13903 }
13904 else
13905 emit_insn (gen_x86_shift_adj_3 (low[0], high[0], operands[2]));
13906 }
13907 }
13908
13909 void
13910 ix86_split_lshr (rtx *operands, rtx scratch, enum machine_mode mode)
13911 {
13912 rtx low[2], high[2];
13913 int count;
13914 const int single_width = mode == DImode ? 32 : 64;
13915
13916 if (CONST_INT_P (operands[2]))
13917 {
13918 (mode == DImode ? split_di : split_ti) (operands, 2, low, high);
13919 count = INTVAL (operands[2]) & (single_width * 2 - 1);
13920
13921 if (count >= single_width)
13922 {
13923 emit_move_insn (low[0], high[1]);
13924 ix86_expand_clear (high[0]);
13925
13926 if (count > single_width)
13927 emit_insn ((mode == DImode
13928 ? gen_lshrsi3
13929 : gen_lshrdi3) (low[0], low[0],
13930 GEN_INT (count - single_width)));
13931 }
13932 else
13933 {
13934 if (!rtx_equal_p (operands[0], operands[1]))
13935 emit_move_insn (operands[0], operands[1]);
13936 emit_insn ((mode == DImode
13937 ? gen_x86_shrd_1
13938 : gen_x86_64_shrd) (low[0], high[0], GEN_INT (count)));
13939 emit_insn ((mode == DImode
13940 ? gen_lshrsi3
13941 : gen_lshrdi3) (high[0], high[0], GEN_INT (count)));
13942 }
13943 }
13944 else
13945 {
13946 if (!rtx_equal_p (operands[0], operands[1]))
13947 emit_move_insn (operands[0], operands[1]);
13948
13949 (mode == DImode ? split_di : split_ti) (operands, 1, low, high);
13950
13951 emit_insn ((mode == DImode
13952 ? gen_x86_shrd_1
13953 : gen_x86_64_shrd) (low[0], high[0], operands[2]));
13954 emit_insn ((mode == DImode
13955 ? gen_lshrsi3
13956 : gen_lshrdi3) (high[0], high[0], operands[2]));
13957
13958 /* Heh. By reversing the arguments, we can reuse this pattern. */
13959 if (TARGET_CMOVE && scratch)
13960 {
13961 ix86_expand_clear (scratch);
13962 emit_insn ((mode == DImode
13963 ? gen_x86_shift_adj_1
13964 : gen_x86_64_shift_adj) (low[0], high[0], operands[2],
13965 scratch));
13966 }
13967 else
13968 emit_insn (gen_x86_shift_adj_2 (low[0], high[0], operands[2]));
13969 }
13970 }
13971
13972 /* Predict just emitted jump instruction to be taken with probability PROB. */
13973 static void
13974 predict_jump (int prob)
13975 {
13976 rtx insn = get_last_insn ();
13977 gcc_assert (JUMP_P (insn));
13978 REG_NOTES (insn)
13979 = gen_rtx_EXPR_LIST (REG_BR_PROB,
13980 GEN_INT (prob),
13981 REG_NOTES (insn));
13982 }
13983
13984 /* Helper function for the string operations below. Dest VARIABLE whether
13985 it is aligned to VALUE bytes. If true, jump to the label. */
13986 static rtx
13987 ix86_expand_aligntest (rtx variable, int value, bool epilogue)
13988 {
13989 rtx label = gen_label_rtx ();
13990 rtx tmpcount = gen_reg_rtx (GET_MODE (variable));
13991 if (GET_MODE (variable) == DImode)
13992 emit_insn (gen_anddi3 (tmpcount, variable, GEN_INT (value)));
13993 else
13994 emit_insn (gen_andsi3 (tmpcount, variable, GEN_INT (value)));
13995 emit_cmp_and_jump_insns (tmpcount, const0_rtx, EQ, 0, GET_MODE (variable),
13996 1, label);
13997 if (epilogue)
13998 predict_jump (REG_BR_PROB_BASE * 50 / 100);
13999 else
14000 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14001 return label;
14002 }
14003
14004 /* Adjust COUNTER by the VALUE. */
14005 static void
14006 ix86_adjust_counter (rtx countreg, HOST_WIDE_INT value)
14007 {
14008 if (GET_MODE (countreg) == DImode)
14009 emit_insn (gen_adddi3 (countreg, countreg, GEN_INT (-value)));
14010 else
14011 emit_insn (gen_addsi3 (countreg, countreg, GEN_INT (-value)));
14012 }
14013
14014 /* Zero extend possibly SImode EXP to Pmode register. */
14015 rtx
14016 ix86_zero_extend_to_Pmode (rtx exp)
14017 {
14018 rtx r;
14019 if (GET_MODE (exp) == VOIDmode)
14020 return force_reg (Pmode, exp);
14021 if (GET_MODE (exp) == Pmode)
14022 return copy_to_mode_reg (Pmode, exp);
14023 r = gen_reg_rtx (Pmode);
14024 emit_insn (gen_zero_extendsidi2 (r, exp));
14025 return r;
14026 }
14027
14028 /* Divide COUNTREG by SCALE. */
14029 static rtx
14030 scale_counter (rtx countreg, int scale)
14031 {
14032 rtx sc;
14033 rtx piece_size_mask;
14034
14035 if (scale == 1)
14036 return countreg;
14037 if (CONST_INT_P (countreg))
14038 return GEN_INT (INTVAL (countreg) / scale);
14039 gcc_assert (REG_P (countreg));
14040
14041 piece_size_mask = GEN_INT (scale - 1);
14042 sc = expand_simple_binop (GET_MODE (countreg), LSHIFTRT, countreg,
14043 GEN_INT (exact_log2 (scale)),
14044 NULL, 1, OPTAB_DIRECT);
14045 return sc;
14046 }
14047
14048 /* Return mode for the memcpy/memset loop counter. Prefer SImode over
14049 DImode for constant loop counts. */
14050
14051 static enum machine_mode
14052 counter_mode (rtx count_exp)
14053 {
14054 if (GET_MODE (count_exp) != VOIDmode)
14055 return GET_MODE (count_exp);
14056 if (GET_CODE (count_exp) != CONST_INT)
14057 return Pmode;
14058 if (TARGET_64BIT && (INTVAL (count_exp) & ~0xffffffff))
14059 return DImode;
14060 return SImode;
14061 }
14062
14063 /* When SRCPTR is non-NULL, output simple loop to move memory
14064 pointer to SRCPTR to DESTPTR via chunks of MODE unrolled UNROLL times,
14065 overall size is COUNT specified in bytes. When SRCPTR is NULL, output the
14066 equivalent loop to set memory by VALUE (supposed to be in MODE).
14067
14068 The size is rounded down to whole number of chunk size moved at once.
14069 SRCMEM and DESTMEM provide MEMrtx to feed proper aliasing info. */
14070
14071
14072 static void
14073 expand_set_or_movmem_via_loop (rtx destmem, rtx srcmem,
14074 rtx destptr, rtx srcptr, rtx value,
14075 rtx count, enum machine_mode mode, int unroll,
14076 int expected_size)
14077 {
14078 rtx out_label, top_label, iter, tmp;
14079 enum machine_mode iter_mode = counter_mode (count);
14080 rtx piece_size = GEN_INT (GET_MODE_SIZE (mode) * unroll);
14081 rtx piece_size_mask = GEN_INT (~((GET_MODE_SIZE (mode) * unroll) - 1));
14082 rtx size;
14083 rtx x_addr;
14084 rtx y_addr;
14085 int i;
14086
14087 top_label = gen_label_rtx ();
14088 out_label = gen_label_rtx ();
14089 iter = gen_reg_rtx (iter_mode);
14090
14091 size = expand_simple_binop (iter_mode, AND, count, piece_size_mask,
14092 NULL, 1, OPTAB_DIRECT);
14093 /* Those two should combine. */
14094 if (piece_size == const1_rtx)
14095 {
14096 emit_cmp_and_jump_insns (size, const0_rtx, EQ, NULL_RTX, iter_mode,
14097 true, out_label);
14098 predict_jump (REG_BR_PROB_BASE * 10 / 100);
14099 }
14100 emit_move_insn (iter, const0_rtx);
14101
14102 emit_label (top_label);
14103
14104 tmp = convert_modes (Pmode, iter_mode, iter, true);
14105 x_addr = gen_rtx_PLUS (Pmode, destptr, tmp);
14106 destmem = change_address (destmem, mode, x_addr);
14107
14108 if (srcmem)
14109 {
14110 y_addr = gen_rtx_PLUS (Pmode, srcptr, copy_rtx (tmp));
14111 srcmem = change_address (srcmem, mode, y_addr);
14112
14113 /* When unrolling for chips that reorder memory reads and writes,
14114 we can save registers by using single temporary.
14115 Also using 4 temporaries is overkill in 32bit mode. */
14116 if (!TARGET_64BIT && 0)
14117 {
14118 for (i = 0; i < unroll; i++)
14119 {
14120 if (i)
14121 {
14122 destmem =
14123 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14124 srcmem =
14125 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14126 }
14127 emit_move_insn (destmem, srcmem);
14128 }
14129 }
14130 else
14131 {
14132 rtx tmpreg[4];
14133 gcc_assert (unroll <= 4);
14134 for (i = 0; i < unroll; i++)
14135 {
14136 tmpreg[i] = gen_reg_rtx (mode);
14137 if (i)
14138 {
14139 srcmem =
14140 adjust_address (copy_rtx (srcmem), mode, GET_MODE_SIZE (mode));
14141 }
14142 emit_move_insn (tmpreg[i], srcmem);
14143 }
14144 for (i = 0; i < unroll; i++)
14145 {
14146 if (i)
14147 {
14148 destmem =
14149 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14150 }
14151 emit_move_insn (destmem, tmpreg[i]);
14152 }
14153 }
14154 }
14155 else
14156 for (i = 0; i < unroll; i++)
14157 {
14158 if (i)
14159 destmem =
14160 adjust_address (copy_rtx (destmem), mode, GET_MODE_SIZE (mode));
14161 emit_move_insn (destmem, value);
14162 }
14163
14164 tmp = expand_simple_binop (iter_mode, PLUS, iter, piece_size, iter,
14165 true, OPTAB_LIB_WIDEN);
14166 if (tmp != iter)
14167 emit_move_insn (iter, tmp);
14168
14169 emit_cmp_and_jump_insns (iter, size, LT, NULL_RTX, iter_mode,
14170 true, top_label);
14171 if (expected_size != -1)
14172 {
14173 expected_size /= GET_MODE_SIZE (mode) * unroll;
14174 if (expected_size == 0)
14175 predict_jump (0);
14176 else if (expected_size > REG_BR_PROB_BASE)
14177 predict_jump (REG_BR_PROB_BASE - 1);
14178 else
14179 predict_jump (REG_BR_PROB_BASE - (REG_BR_PROB_BASE + expected_size / 2) / expected_size);
14180 }
14181 else
14182 predict_jump (REG_BR_PROB_BASE * 80 / 100);
14183 iter = ix86_zero_extend_to_Pmode (iter);
14184 tmp = expand_simple_binop (Pmode, PLUS, destptr, iter, destptr,
14185 true, OPTAB_LIB_WIDEN);
14186 if (tmp != destptr)
14187 emit_move_insn (destptr, tmp);
14188 if (srcptr)
14189 {
14190 tmp = expand_simple_binop (Pmode, PLUS, srcptr, iter, srcptr,
14191 true, OPTAB_LIB_WIDEN);
14192 if (tmp != srcptr)
14193 emit_move_insn (srcptr, tmp);
14194 }
14195 emit_label (out_label);
14196 }
14197
14198 /* Output "rep; mov" instruction.
14199 Arguments have same meaning as for previous function */
14200 static void
14201 expand_movmem_via_rep_mov (rtx destmem, rtx srcmem,
14202 rtx destptr, rtx srcptr,
14203 rtx count,
14204 enum machine_mode mode)
14205 {
14206 rtx destexp;
14207 rtx srcexp;
14208 rtx countreg;
14209
14210 /* If the size is known, it is shorter to use rep movs. */
14211 if (mode == QImode && CONST_INT_P (count)
14212 && !(INTVAL (count) & 3))
14213 mode = SImode;
14214
14215 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14216 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14217 if (srcptr != XEXP (srcmem, 0) || GET_MODE (srcmem) != BLKmode)
14218 srcmem = adjust_automodify_address_nv (srcmem, BLKmode, srcptr, 0);
14219 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14220 if (mode != QImode)
14221 {
14222 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14223 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14224 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14225 srcexp = gen_rtx_ASHIFT (Pmode, countreg,
14226 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14227 srcexp = gen_rtx_PLUS (Pmode, srcexp, srcptr);
14228 }
14229 else
14230 {
14231 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14232 srcexp = gen_rtx_PLUS (Pmode, srcptr, countreg);
14233 }
14234 emit_insn (gen_rep_mov (destptr, destmem, srcptr, srcmem, countreg,
14235 destexp, srcexp));
14236 }
14237
14238 /* Output "rep; stos" instruction.
14239 Arguments have same meaning as for previous function */
14240 static void
14241 expand_setmem_via_rep_stos (rtx destmem, rtx destptr, rtx value,
14242 rtx count,
14243 enum machine_mode mode)
14244 {
14245 rtx destexp;
14246 rtx countreg;
14247
14248 if (destptr != XEXP (destmem, 0) || GET_MODE (destmem) != BLKmode)
14249 destmem = adjust_automodify_address_nv (destmem, BLKmode, destptr, 0);
14250 value = force_reg (mode, gen_lowpart (mode, value));
14251 countreg = ix86_zero_extend_to_Pmode (scale_counter (count, GET_MODE_SIZE (mode)));
14252 if (mode != QImode)
14253 {
14254 destexp = gen_rtx_ASHIFT (Pmode, countreg,
14255 GEN_INT (exact_log2 (GET_MODE_SIZE (mode))));
14256 destexp = gen_rtx_PLUS (Pmode, destexp, destptr);
14257 }
14258 else
14259 destexp = gen_rtx_PLUS (Pmode, destptr, countreg);
14260 emit_insn (gen_rep_stos (destptr, countreg, destmem, value, destexp));
14261 }
14262
14263 static void
14264 emit_strmov (rtx destmem, rtx srcmem,
14265 rtx destptr, rtx srcptr, enum machine_mode mode, int offset)
14266 {
14267 rtx src = adjust_automodify_address_nv (srcmem, mode, srcptr, offset);
14268 rtx dest = adjust_automodify_address_nv (destmem, mode, destptr, offset);
14269 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14270 }
14271
14272 /* Output code to copy at most count & (max_size - 1) bytes from SRC to DEST. */
14273 static void
14274 expand_movmem_epilogue (rtx destmem, rtx srcmem,
14275 rtx destptr, rtx srcptr, rtx count, int max_size)
14276 {
14277 rtx src, dest;
14278 if (CONST_INT_P (count))
14279 {
14280 HOST_WIDE_INT countval = INTVAL (count);
14281 int offset = 0;
14282
14283 if ((countval & 0x10) && max_size > 16)
14284 {
14285 if (TARGET_64BIT)
14286 {
14287 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14288 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset + 8);
14289 }
14290 else
14291 gcc_unreachable ();
14292 offset += 16;
14293 }
14294 if ((countval & 0x08) && max_size > 8)
14295 {
14296 if (TARGET_64BIT)
14297 emit_strmov (destmem, srcmem, destptr, srcptr, DImode, offset);
14298 else
14299 {
14300 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14301 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset + 4);
14302 }
14303 offset += 8;
14304 }
14305 if ((countval & 0x04) && max_size > 4)
14306 {
14307 emit_strmov (destmem, srcmem, destptr, srcptr, SImode, offset);
14308 offset += 4;
14309 }
14310 if ((countval & 0x02) && max_size > 2)
14311 {
14312 emit_strmov (destmem, srcmem, destptr, srcptr, HImode, offset);
14313 offset += 2;
14314 }
14315 if ((countval & 0x01) && max_size > 1)
14316 {
14317 emit_strmov (destmem, srcmem, destptr, srcptr, QImode, offset);
14318 offset += 1;
14319 }
14320 return;
14321 }
14322 if (max_size > 8)
14323 {
14324 count = expand_simple_binop (GET_MODE (count), AND, count, GEN_INT (max_size - 1),
14325 count, 1, OPTAB_DIRECT);
14326 expand_set_or_movmem_via_loop (destmem, srcmem, destptr, srcptr, NULL,
14327 count, QImode, 1, 4);
14328 return;
14329 }
14330
14331 /* When there are stringops, we can cheaply increase dest and src pointers.
14332 Otherwise we save code size by maintaining offset (zero is readily
14333 available from preceding rep operation) and using x86 addressing modes.
14334 */
14335 if (TARGET_SINGLE_STRINGOP)
14336 {
14337 if (max_size > 4)
14338 {
14339 rtx label = ix86_expand_aligntest (count, 4, true);
14340 src = change_address (srcmem, SImode, srcptr);
14341 dest = change_address (destmem, SImode, destptr);
14342 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14343 emit_label (label);
14344 LABEL_NUSES (label) = 1;
14345 }
14346 if (max_size > 2)
14347 {
14348 rtx label = ix86_expand_aligntest (count, 2, true);
14349 src = change_address (srcmem, HImode, srcptr);
14350 dest = change_address (destmem, HImode, destptr);
14351 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14352 emit_label (label);
14353 LABEL_NUSES (label) = 1;
14354 }
14355 if (max_size > 1)
14356 {
14357 rtx label = ix86_expand_aligntest (count, 1, true);
14358 src = change_address (srcmem, QImode, srcptr);
14359 dest = change_address (destmem, QImode, destptr);
14360 emit_insn (gen_strmov (destptr, dest, srcptr, src));
14361 emit_label (label);
14362 LABEL_NUSES (label) = 1;
14363 }
14364 }
14365 else
14366 {
14367 rtx offset = force_reg (Pmode, const0_rtx);
14368 rtx tmp;
14369
14370 if (max_size > 4)
14371 {
14372 rtx label = ix86_expand_aligntest (count, 4, true);
14373 src = change_address (srcmem, SImode, srcptr);
14374 dest = change_address (destmem, SImode, destptr);
14375 emit_move_insn (dest, src);
14376 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (4), NULL,
14377 true, OPTAB_LIB_WIDEN);
14378 if (tmp != offset)
14379 emit_move_insn (offset, tmp);
14380 emit_label (label);
14381 LABEL_NUSES (label) = 1;
14382 }
14383 if (max_size > 2)
14384 {
14385 rtx label = ix86_expand_aligntest (count, 2, true);
14386 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14387 src = change_address (srcmem, HImode, tmp);
14388 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14389 dest = change_address (destmem, HImode, tmp);
14390 emit_move_insn (dest, src);
14391 tmp = expand_simple_binop (Pmode, PLUS, offset, GEN_INT (2), tmp,
14392 true, OPTAB_LIB_WIDEN);
14393 if (tmp != offset)
14394 emit_move_insn (offset, tmp);
14395 emit_label (label);
14396 LABEL_NUSES (label) = 1;
14397 }
14398 if (max_size > 1)
14399 {
14400 rtx label = ix86_expand_aligntest (count, 1, true);
14401 tmp = gen_rtx_PLUS (Pmode, srcptr, offset);
14402 src = change_address (srcmem, QImode, tmp);
14403 tmp = gen_rtx_PLUS (Pmode, destptr, offset);
14404 dest = change_address (destmem, QImode, tmp);
14405 emit_move_insn (dest, src);
14406 emit_label (label);
14407 LABEL_NUSES (label) = 1;
14408 }
14409 }
14410 }
14411
14412 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14413 static void
14414 expand_setmem_epilogue_via_loop (rtx destmem, rtx destptr, rtx value,
14415 rtx count, int max_size)
14416 {
14417 count =
14418 expand_simple_binop (counter_mode (count), AND, count,
14419 GEN_INT (max_size - 1), count, 1, OPTAB_DIRECT);
14420 expand_set_or_movmem_via_loop (destmem, NULL, destptr, NULL,
14421 gen_lowpart (QImode, value), count, QImode,
14422 1, max_size / 2);
14423 }
14424
14425 /* Output code to set at most count & (max_size - 1) bytes starting by DEST. */
14426 static void
14427 expand_setmem_epilogue (rtx destmem, rtx destptr, rtx value, rtx count, int max_size)
14428 {
14429 rtx dest;
14430
14431 if (CONST_INT_P (count))
14432 {
14433 HOST_WIDE_INT countval = INTVAL (count);
14434 int offset = 0;
14435
14436 if ((countval & 0x10) && max_size > 16)
14437 {
14438 if (TARGET_64BIT)
14439 {
14440 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14441 emit_insn (gen_strset (destptr, dest, value));
14442 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset + 8);
14443 emit_insn (gen_strset (destptr, dest, value));
14444 }
14445 else
14446 gcc_unreachable ();
14447 offset += 16;
14448 }
14449 if ((countval & 0x08) && max_size > 8)
14450 {
14451 if (TARGET_64BIT)
14452 {
14453 dest = adjust_automodify_address_nv (destmem, DImode, destptr, offset);
14454 emit_insn (gen_strset (destptr, dest, value));
14455 }
14456 else
14457 {
14458 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14459 emit_insn (gen_strset (destptr, dest, value));
14460 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset + 4);
14461 emit_insn (gen_strset (destptr, dest, value));
14462 }
14463 offset += 8;
14464 }
14465 if ((countval & 0x04) && max_size > 4)
14466 {
14467 dest = adjust_automodify_address_nv (destmem, SImode, destptr, offset);
14468 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14469 offset += 4;
14470 }
14471 if ((countval & 0x02) && max_size > 2)
14472 {
14473 dest = adjust_automodify_address_nv (destmem, HImode, destptr, offset);
14474 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14475 offset += 2;
14476 }
14477 if ((countval & 0x01) && max_size > 1)
14478 {
14479 dest = adjust_automodify_address_nv (destmem, QImode, destptr, offset);
14480 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14481 offset += 1;
14482 }
14483 return;
14484 }
14485 if (max_size > 32)
14486 {
14487 expand_setmem_epilogue_via_loop (destmem, destptr, value, count, max_size);
14488 return;
14489 }
14490 if (max_size > 16)
14491 {
14492 rtx label = ix86_expand_aligntest (count, 16, true);
14493 if (TARGET_64BIT)
14494 {
14495 dest = change_address (destmem, DImode, destptr);
14496 emit_insn (gen_strset (destptr, dest, value));
14497 emit_insn (gen_strset (destptr, dest, value));
14498 }
14499 else
14500 {
14501 dest = change_address (destmem, SImode, destptr);
14502 emit_insn (gen_strset (destptr, dest, value));
14503 emit_insn (gen_strset (destptr, dest, value));
14504 emit_insn (gen_strset (destptr, dest, value));
14505 emit_insn (gen_strset (destptr, dest, value));
14506 }
14507 emit_label (label);
14508 LABEL_NUSES (label) = 1;
14509 }
14510 if (max_size > 8)
14511 {
14512 rtx label = ix86_expand_aligntest (count, 8, true);
14513 if (TARGET_64BIT)
14514 {
14515 dest = change_address (destmem, DImode, destptr);
14516 emit_insn (gen_strset (destptr, dest, value));
14517 }
14518 else
14519 {
14520 dest = change_address (destmem, SImode, destptr);
14521 emit_insn (gen_strset (destptr, dest, value));
14522 emit_insn (gen_strset (destptr, dest, value));
14523 }
14524 emit_label (label);
14525 LABEL_NUSES (label) = 1;
14526 }
14527 if (max_size > 4)
14528 {
14529 rtx label = ix86_expand_aligntest (count, 4, true);
14530 dest = change_address (destmem, SImode, destptr);
14531 emit_insn (gen_strset (destptr, dest, gen_lowpart (SImode, value)));
14532 emit_label (label);
14533 LABEL_NUSES (label) = 1;
14534 }
14535 if (max_size > 2)
14536 {
14537 rtx label = ix86_expand_aligntest (count, 2, true);
14538 dest = change_address (destmem, HImode, destptr);
14539 emit_insn (gen_strset (destptr, dest, gen_lowpart (HImode, value)));
14540 emit_label (label);
14541 LABEL_NUSES (label) = 1;
14542 }
14543 if (max_size > 1)
14544 {
14545 rtx label = ix86_expand_aligntest (count, 1, true);
14546 dest = change_address (destmem, QImode, destptr);
14547 emit_insn (gen_strset (destptr, dest, gen_lowpart (QImode, value)));
14548 emit_label (label);
14549 LABEL_NUSES (label) = 1;
14550 }
14551 }
14552
14553 /* Copy enough from DEST to SRC to align DEST known to by aligned by ALIGN to
14554 DESIRED_ALIGNMENT. */
14555 static void
14556 expand_movmem_prologue (rtx destmem, rtx srcmem,
14557 rtx destptr, rtx srcptr, rtx count,
14558 int align, int desired_alignment)
14559 {
14560 if (align <= 1 && desired_alignment > 1)
14561 {
14562 rtx label = ix86_expand_aligntest (destptr, 1, false);
14563 srcmem = change_address (srcmem, QImode, srcptr);
14564 destmem = change_address (destmem, QImode, destptr);
14565 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14566 ix86_adjust_counter (count, 1);
14567 emit_label (label);
14568 LABEL_NUSES (label) = 1;
14569 }
14570 if (align <= 2 && desired_alignment > 2)
14571 {
14572 rtx label = ix86_expand_aligntest (destptr, 2, false);
14573 srcmem = change_address (srcmem, HImode, srcptr);
14574 destmem = change_address (destmem, HImode, destptr);
14575 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14576 ix86_adjust_counter (count, 2);
14577 emit_label (label);
14578 LABEL_NUSES (label) = 1;
14579 }
14580 if (align <= 4 && desired_alignment > 4)
14581 {
14582 rtx label = ix86_expand_aligntest (destptr, 4, false);
14583 srcmem = change_address (srcmem, SImode, srcptr);
14584 destmem = change_address (destmem, SImode, destptr);
14585 emit_insn (gen_strmov (destptr, destmem, srcptr, srcmem));
14586 ix86_adjust_counter (count, 4);
14587 emit_label (label);
14588 LABEL_NUSES (label) = 1;
14589 }
14590 gcc_assert (desired_alignment <= 8);
14591 }
14592
14593 /* Set enough from DEST to align DEST known to by aligned by ALIGN to
14594 DESIRED_ALIGNMENT. */
14595 static void
14596 expand_setmem_prologue (rtx destmem, rtx destptr, rtx value, rtx count,
14597 int align, int desired_alignment)
14598 {
14599 if (align <= 1 && desired_alignment > 1)
14600 {
14601 rtx label = ix86_expand_aligntest (destptr, 1, false);
14602 destmem = change_address (destmem, QImode, destptr);
14603 emit_insn (gen_strset (destptr, destmem, gen_lowpart (QImode, value)));
14604 ix86_adjust_counter (count, 1);
14605 emit_label (label);
14606 LABEL_NUSES (label) = 1;
14607 }
14608 if (align <= 2 && desired_alignment > 2)
14609 {
14610 rtx label = ix86_expand_aligntest (destptr, 2, false);
14611 destmem = change_address (destmem, HImode, destptr);
14612 emit_insn (gen_strset (destptr, destmem, gen_lowpart (HImode, value)));
14613 ix86_adjust_counter (count, 2);
14614 emit_label (label);
14615 LABEL_NUSES (label) = 1;
14616 }
14617 if (align <= 4 && desired_alignment > 4)
14618 {
14619 rtx label = ix86_expand_aligntest (destptr, 4, false);
14620 destmem = change_address (destmem, SImode, destptr);
14621 emit_insn (gen_strset (destptr, destmem, gen_lowpart (SImode, value)));
14622 ix86_adjust_counter (count, 4);
14623 emit_label (label);
14624 LABEL_NUSES (label) = 1;
14625 }
14626 gcc_assert (desired_alignment <= 8);
14627 }
14628
14629 /* Given COUNT and EXPECTED_SIZE, decide on codegen of string operation. */
14630 static enum stringop_alg
14631 decide_alg (HOST_WIDE_INT count, HOST_WIDE_INT expected_size, bool memset,
14632 int *dynamic_check)
14633 {
14634 const struct stringop_algs * algs;
14635
14636 *dynamic_check = -1;
14637 if (memset)
14638 algs = &ix86_cost->memset[TARGET_64BIT != 0];
14639 else
14640 algs = &ix86_cost->memcpy[TARGET_64BIT != 0];
14641 if (stringop_alg != no_stringop)
14642 return stringop_alg;
14643 /* rep; movq or rep; movl is the smallest variant. */
14644 else if (optimize_size)
14645 {
14646 if (!count || (count & 3))
14647 return rep_prefix_1_byte;
14648 else
14649 return rep_prefix_4_byte;
14650 }
14651 /* Very tiny blocks are best handled via the loop, REP is expensive to setup.
14652 */
14653 else if (expected_size != -1 && expected_size < 4)
14654 return loop_1_byte;
14655 else if (expected_size != -1)
14656 {
14657 unsigned int i;
14658 enum stringop_alg alg = libcall;
14659 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14660 {
14661 gcc_assert (algs->size[i].max);
14662 if (algs->size[i].max >= expected_size || algs->size[i].max == -1)
14663 {
14664 if (algs->size[i].alg != libcall)
14665 alg = algs->size[i].alg;
14666 /* Honor TARGET_INLINE_ALL_STRINGOPS by picking
14667 last non-libcall inline algorithm. */
14668 if (TARGET_INLINE_ALL_STRINGOPS)
14669 {
14670 /* When the current size is best to be copied by a libcall,
14671 but we are still forced to inline, run the heuristic bellow
14672 that will pick code for medium sized blocks. */
14673 if (alg != libcall)
14674 return alg;
14675 break;
14676 }
14677 else
14678 return algs->size[i].alg;
14679 }
14680 }
14681 gcc_assert (TARGET_INLINE_ALL_STRINGOPS);
14682 }
14683 /* When asked to inline the call anyway, try to pick meaningful choice.
14684 We look for maximal size of block that is faster to copy by hand and
14685 take blocks of at most of that size guessing that average size will
14686 be roughly half of the block.
14687
14688 If this turns out to be bad, we might simply specify the preferred
14689 choice in ix86_costs. */
14690 if ((TARGET_INLINE_ALL_STRINGOPS || TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14691 && algs->unknown_size == libcall)
14692 {
14693 int max = -1;
14694 enum stringop_alg alg;
14695 int i;
14696
14697 for (i = 0; i < NAX_STRINGOP_ALGS; i++)
14698 if (algs->size[i].alg != libcall && algs->size[i].alg)
14699 max = algs->size[i].max;
14700 if (max == -1)
14701 max = 4096;
14702 alg = decide_alg (count, max / 2, memset, dynamic_check);
14703 gcc_assert (*dynamic_check == -1);
14704 gcc_assert (alg != libcall);
14705 if (TARGET_INLINE_STRINGOPS_DYNAMICALLY)
14706 *dynamic_check = max;
14707 return alg;
14708 }
14709 return algs->unknown_size;
14710 }
14711
14712 /* Decide on alignment. We know that the operand is already aligned to ALIGN
14713 (ALIGN can be based on profile feedback and thus it is not 100% guaranteed). */
14714 static int
14715 decide_alignment (int align,
14716 enum stringop_alg alg,
14717 int expected_size)
14718 {
14719 int desired_align = 0;
14720 switch (alg)
14721 {
14722 case no_stringop:
14723 gcc_unreachable ();
14724 case loop:
14725 case unrolled_loop:
14726 desired_align = GET_MODE_SIZE (Pmode);
14727 break;
14728 case rep_prefix_8_byte:
14729 desired_align = 8;
14730 break;
14731 case rep_prefix_4_byte:
14732 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14733 copying whole cacheline at once. */
14734 if (TARGET_PENTIUMPRO)
14735 desired_align = 8;
14736 else
14737 desired_align = 4;
14738 break;
14739 case rep_prefix_1_byte:
14740 /* PentiumPro has special logic triggering for 8 byte aligned blocks.
14741 copying whole cacheline at once. */
14742 if (TARGET_PENTIUMPRO)
14743 desired_align = 8;
14744 else
14745 desired_align = 1;
14746 break;
14747 case loop_1_byte:
14748 desired_align = 1;
14749 break;
14750 case libcall:
14751 return 0;
14752 }
14753
14754 if (optimize_size)
14755 desired_align = 1;
14756 if (desired_align < align)
14757 desired_align = align;
14758 if (expected_size != -1 && expected_size < 4)
14759 desired_align = align;
14760 return desired_align;
14761 }
14762
14763 /* Return the smallest power of 2 greater than VAL. */
14764 static int
14765 smallest_pow2_greater_than (int val)
14766 {
14767 int ret = 1;
14768 while (ret <= val)
14769 ret <<= 1;
14770 return ret;
14771 }
14772
14773 /* Expand string move (memcpy) operation. Use i386 string operations when
14774 profitable. expand_clrmem contains similar code. The code depends upon
14775 architecture, block size and alignment, but always has the same
14776 overall structure:
14777
14778 1) Prologue guard: Conditional that jumps up to epilogues for small
14779 blocks that can be handled by epilogue alone. This is faster but
14780 also needed for correctness, since prologue assume the block is larger
14781 than the desired alignment.
14782
14783 Optional dynamic check for size and libcall for large
14784 blocks is emitted here too, with -minline-stringops-dynamically.
14785
14786 2) Prologue: copy first few bytes in order to get destination aligned
14787 to DESIRED_ALIGN. It is emitted only when ALIGN is less than
14788 DESIRED_ALIGN and and up to DESIRED_ALIGN - ALIGN bytes can be copied.
14789 We emit either a jump tree on power of two sized blocks, or a byte loop.
14790
14791 3) Main body: the copying loop itself, copying in SIZE_NEEDED chunks
14792 with specified algorithm.
14793
14794 4) Epilogue: code copying tail of the block that is too small to be
14795 handled by main body (or up to size guarded by prologue guard). */
14796
14797 int
14798 ix86_expand_movmem (rtx dst, rtx src, rtx count_exp, rtx align_exp,
14799 rtx expected_align_exp, rtx expected_size_exp)
14800 {
14801 rtx destreg;
14802 rtx srcreg;
14803 rtx label = NULL;
14804 rtx tmp;
14805 rtx jump_around_label = NULL;
14806 HOST_WIDE_INT align = 1;
14807 unsigned HOST_WIDE_INT count = 0;
14808 HOST_WIDE_INT expected_size = -1;
14809 int size_needed = 0, epilogue_size_needed;
14810 int desired_align = 0;
14811 enum stringop_alg alg;
14812 int dynamic_check;
14813
14814 if (CONST_INT_P (align_exp))
14815 align = INTVAL (align_exp);
14816 /* i386 can do misaligned access on reasonably increased cost. */
14817 if (CONST_INT_P (expected_align_exp)
14818 && INTVAL (expected_align_exp) > align)
14819 align = INTVAL (expected_align_exp);
14820 if (CONST_INT_P (count_exp))
14821 count = expected_size = INTVAL (count_exp);
14822 if (CONST_INT_P (expected_size_exp) && count == 0)
14823 expected_size = INTVAL (expected_size_exp);
14824
14825 /* Step 0: Decide on preferred algorithm, desired alignment and
14826 size of chunks to be copied by main loop. */
14827
14828 alg = decide_alg (count, expected_size, false, &dynamic_check);
14829 desired_align = decide_alignment (align, alg, expected_size);
14830
14831 if (!TARGET_ALIGN_STRINGOPS)
14832 align = desired_align;
14833
14834 if (alg == libcall)
14835 return 0;
14836 gcc_assert (alg != no_stringop);
14837 if (!count)
14838 count_exp = copy_to_mode_reg (GET_MODE (count_exp), count_exp);
14839 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
14840 srcreg = copy_to_mode_reg (Pmode, XEXP (src, 0));
14841 switch (alg)
14842 {
14843 case libcall:
14844 case no_stringop:
14845 gcc_unreachable ();
14846 case loop:
14847 size_needed = GET_MODE_SIZE (Pmode);
14848 break;
14849 case unrolled_loop:
14850 size_needed = GET_MODE_SIZE (Pmode) * (TARGET_64BIT ? 4 : 2);
14851 break;
14852 case rep_prefix_8_byte:
14853 size_needed = 8;
14854 break;
14855 case rep_prefix_4_byte:
14856 size_needed = 4;
14857 break;
14858 case rep_prefix_1_byte:
14859 case loop_1_byte:
14860 size_needed = 1;
14861 break;
14862 }
14863
14864 epilogue_size_needed = size_needed;
14865
14866 /* Step 1: Prologue guard. */
14867
14868 /* Alignment code needs count to be in register. */
14869 if (CONST_INT_P (count_exp) && desired_align > align)
14870 {
14871 enum machine_mode mode = SImode;
14872 if (TARGET_64BIT && (count & ~0xffffffff))
14873 mode = DImode;
14874 count_exp = force_reg (mode, count_exp);
14875 }
14876 gcc_assert (desired_align >= 1 && align >= 1);
14877
14878 /* Ensure that alignment prologue won't copy past end of block. */
14879 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
14880 {
14881 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
14882 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
14883 Make sure it is power of 2. */
14884 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
14885
14886 label = gen_label_rtx ();
14887 emit_cmp_and_jump_insns (count_exp,
14888 GEN_INT (epilogue_size_needed),
14889 LTU, 0, counter_mode (count_exp), 1, label);
14890 if (GET_CODE (count_exp) == CONST_INT)
14891 ;
14892 else if (expected_size == -1 || expected_size < epilogue_size_needed)
14893 predict_jump (REG_BR_PROB_BASE * 60 / 100);
14894 else
14895 predict_jump (REG_BR_PROB_BASE * 20 / 100);
14896 }
14897 /* Emit code to decide on runtime whether library call or inline should be
14898 used. */
14899 if (dynamic_check != -1)
14900 {
14901 rtx hot_label = gen_label_rtx ();
14902 jump_around_label = gen_label_rtx ();
14903 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
14904 LEU, 0, GET_MODE (count_exp), 1, hot_label);
14905 predict_jump (REG_BR_PROB_BASE * 90 / 100);
14906 emit_block_move_via_libcall (dst, src, count_exp, false);
14907 emit_jump (jump_around_label);
14908 emit_label (hot_label);
14909 }
14910
14911 /* Step 2: Alignment prologue. */
14912
14913 if (desired_align > align)
14914 {
14915 /* Except for the first move in epilogue, we no longer know
14916 constant offset in aliasing info. It don't seems to worth
14917 the pain to maintain it for the first move, so throw away
14918 the info early. */
14919 src = change_address (src, BLKmode, srcreg);
14920 dst = change_address (dst, BLKmode, destreg);
14921 expand_movmem_prologue (dst, src, destreg, srcreg, count_exp, align,
14922 desired_align);
14923 }
14924 if (label && size_needed == 1)
14925 {
14926 emit_label (label);
14927 LABEL_NUSES (label) = 1;
14928 label = NULL;
14929 }
14930
14931 /* Step 3: Main loop. */
14932
14933 switch (alg)
14934 {
14935 case libcall:
14936 case no_stringop:
14937 gcc_unreachable ();
14938 case loop_1_byte:
14939 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14940 count_exp, QImode, 1, expected_size);
14941 break;
14942 case loop:
14943 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14944 count_exp, Pmode, 1, expected_size);
14945 break;
14946 case unrolled_loop:
14947 /* Unroll only by factor of 2 in 32bit mode, since we don't have enough
14948 registers for 4 temporaries anyway. */
14949 expand_set_or_movmem_via_loop (dst, src, destreg, srcreg, NULL,
14950 count_exp, Pmode, TARGET_64BIT ? 4 : 2,
14951 expected_size);
14952 break;
14953 case rep_prefix_8_byte:
14954 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14955 DImode);
14956 break;
14957 case rep_prefix_4_byte:
14958 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14959 SImode);
14960 break;
14961 case rep_prefix_1_byte:
14962 expand_movmem_via_rep_mov (dst, src, destreg, srcreg, count_exp,
14963 QImode);
14964 break;
14965 }
14966 /* Adjust properly the offset of src and dest memory for aliasing. */
14967 if (CONST_INT_P (count_exp))
14968 {
14969 src = adjust_automodify_address_nv (src, BLKmode, srcreg,
14970 (count / size_needed) * size_needed);
14971 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
14972 (count / size_needed) * size_needed);
14973 }
14974 else
14975 {
14976 src = change_address (src, BLKmode, srcreg);
14977 dst = change_address (dst, BLKmode, destreg);
14978 }
14979
14980 /* Step 4: Epilogue to copy the remaining bytes. */
14981
14982 if (label)
14983 {
14984 /* When the main loop is done, COUNT_EXP might hold original count,
14985 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
14986 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
14987 bytes. Compensate if needed. */
14988
14989 if (size_needed < epilogue_size_needed)
14990 {
14991 tmp =
14992 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
14993 GEN_INT (size_needed - 1), count_exp, 1,
14994 OPTAB_DIRECT);
14995 if (tmp != count_exp)
14996 emit_move_insn (count_exp, tmp);
14997 }
14998 emit_label (label);
14999 LABEL_NUSES (label) = 1;
15000 }
15001
15002 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15003 expand_movmem_epilogue (dst, src, destreg, srcreg, count_exp,
15004 epilogue_size_needed);
15005 if (jump_around_label)
15006 emit_label (jump_around_label);
15007 return 1;
15008 }
15009
15010 /* Helper function for memcpy. For QImode value 0xXY produce
15011 0xXYXYXYXY of wide specified by MODE. This is essentially
15012 a * 0x10101010, but we can do slightly better than
15013 synth_mult by unwinding the sequence by hand on CPUs with
15014 slow multiply. */
15015 static rtx
15016 promote_duplicated_reg (enum machine_mode mode, rtx val)
15017 {
15018 enum machine_mode valmode = GET_MODE (val);
15019 rtx tmp;
15020 int nops = mode == DImode ? 3 : 2;
15021
15022 gcc_assert (mode == SImode || mode == DImode);
15023 if (val == const0_rtx)
15024 return copy_to_mode_reg (mode, const0_rtx);
15025 if (CONST_INT_P (val))
15026 {
15027 HOST_WIDE_INT v = INTVAL (val) & 255;
15028
15029 v |= v << 8;
15030 v |= v << 16;
15031 if (mode == DImode)
15032 v |= (v << 16) << 16;
15033 return copy_to_mode_reg (mode, gen_int_mode (v, mode));
15034 }
15035
15036 if (valmode == VOIDmode)
15037 valmode = QImode;
15038 if (valmode != QImode)
15039 val = gen_lowpart (QImode, val);
15040 if (mode == QImode)
15041 return val;
15042 if (!TARGET_PARTIAL_REG_STALL)
15043 nops--;
15044 if (ix86_cost->mult_init[mode == DImode ? 3 : 2]
15045 + ix86_cost->mult_bit * (mode == DImode ? 8 : 4)
15046 <= (ix86_cost->shift_const + ix86_cost->add) * nops
15047 + (COSTS_N_INSNS (TARGET_PARTIAL_REG_STALL == 0)))
15048 {
15049 rtx reg = convert_modes (mode, QImode, val, true);
15050 tmp = promote_duplicated_reg (mode, const1_rtx);
15051 return expand_simple_binop (mode, MULT, reg, tmp, NULL, 1,
15052 OPTAB_DIRECT);
15053 }
15054 else
15055 {
15056 rtx reg = convert_modes (mode, QImode, val, true);
15057
15058 if (!TARGET_PARTIAL_REG_STALL)
15059 if (mode == SImode)
15060 emit_insn (gen_movsi_insv_1 (reg, reg));
15061 else
15062 emit_insn (gen_movdi_insv_1_rex64 (reg, reg));
15063 else
15064 {
15065 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (8),
15066 NULL, 1, OPTAB_DIRECT);
15067 reg =
15068 expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15069 }
15070 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (16),
15071 NULL, 1, OPTAB_DIRECT);
15072 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15073 if (mode == SImode)
15074 return reg;
15075 tmp = expand_simple_binop (mode, ASHIFT, reg, GEN_INT (32),
15076 NULL, 1, OPTAB_DIRECT);
15077 reg = expand_simple_binop (mode, IOR, reg, tmp, reg, 1, OPTAB_DIRECT);
15078 return reg;
15079 }
15080 }
15081
15082 /* Duplicate value VAL using promote_duplicated_reg into maximal size that will
15083 be needed by main loop copying SIZE_NEEDED chunks and prologue getting
15084 alignment from ALIGN to DESIRED_ALIGN. */
15085 static rtx
15086 promote_duplicated_reg_to_size (rtx val, int size_needed, int desired_align, int align)
15087 {
15088 rtx promoted_val;
15089
15090 if (TARGET_64BIT
15091 && (size_needed > 4 || (desired_align > align && desired_align > 4)))
15092 promoted_val = promote_duplicated_reg (DImode, val);
15093 else if (size_needed > 2 || (desired_align > align && desired_align > 2))
15094 promoted_val = promote_duplicated_reg (SImode, val);
15095 else if (size_needed > 1 || (desired_align > align && desired_align > 1))
15096 promoted_val = promote_duplicated_reg (HImode, val);
15097 else
15098 promoted_val = val;
15099
15100 return promoted_val;
15101 }
15102
15103 /* Expand string clear operation (bzero). Use i386 string operations when
15104 profitable. See expand_movmem comment for explanation of individual
15105 steps performed. */
15106 int
15107 ix86_expand_setmem (rtx dst, rtx count_exp, rtx val_exp, rtx align_exp,
15108 rtx expected_align_exp, rtx expected_size_exp)
15109 {
15110 rtx destreg;
15111 rtx label = NULL;
15112 rtx tmp;
15113 rtx jump_around_label = NULL;
15114 HOST_WIDE_INT align = 1;
15115 unsigned HOST_WIDE_INT count = 0;
15116 HOST_WIDE_INT expected_size = -1;
15117 int size_needed = 0, epilogue_size_needed;
15118 int desired_align = 0;
15119 enum stringop_alg alg;
15120 rtx promoted_val = NULL;
15121 bool force_loopy_epilogue = false;
15122 int dynamic_check;
15123
15124 if (CONST_INT_P (align_exp))
15125 align = INTVAL (align_exp);
15126 /* i386 can do misaligned access on reasonably increased cost. */
15127 if (CONST_INT_P (expected_align_exp)
15128 && INTVAL (expected_align_exp) > align)
15129 align = INTVAL (expected_align_exp);
15130 if (CONST_INT_P (count_exp))
15131 count = expected_size = INTVAL (count_exp);
15132 if (CONST_INT_P (expected_size_exp) && count == 0)
15133 expected_size = INTVAL (expected_size_exp);
15134
15135 /* Step 0: Decide on preferred algorithm, desired alignment and
15136 size of chunks to be copied by main loop. */
15137
15138 alg = decide_alg (count, expected_size, true, &dynamic_check);
15139 desired_align = decide_alignment (align, alg, expected_size);
15140
15141 if (!TARGET_ALIGN_STRINGOPS)
15142 align = desired_align;
15143
15144 if (alg == libcall)
15145 return 0;
15146 gcc_assert (alg != no_stringop);
15147 if (!count)
15148 count_exp = copy_to_mode_reg (counter_mode (count_exp), count_exp);
15149 destreg = copy_to_mode_reg (Pmode, XEXP (dst, 0));
15150 switch (alg)
15151 {
15152 case libcall:
15153 case no_stringop:
15154 gcc_unreachable ();
15155 case loop:
15156 size_needed = GET_MODE_SIZE (Pmode);
15157 break;
15158 case unrolled_loop:
15159 size_needed = GET_MODE_SIZE (Pmode) * 4;
15160 break;
15161 case rep_prefix_8_byte:
15162 size_needed = 8;
15163 break;
15164 case rep_prefix_4_byte:
15165 size_needed = 4;
15166 break;
15167 case rep_prefix_1_byte:
15168 case loop_1_byte:
15169 size_needed = 1;
15170 break;
15171 }
15172 epilogue_size_needed = size_needed;
15173
15174 /* Step 1: Prologue guard. */
15175
15176 /* Alignment code needs count to be in register. */
15177 if (CONST_INT_P (count_exp) && desired_align > align)
15178 {
15179 enum machine_mode mode = SImode;
15180 if (TARGET_64BIT && (count & ~0xffffffff))
15181 mode = DImode;
15182 count_exp = force_reg (mode, count_exp);
15183 }
15184 /* Do the cheap promotion to allow better CSE across the
15185 main loop and epilogue (ie one load of the big constant in the
15186 front of all code. */
15187 if (CONST_INT_P (val_exp))
15188 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15189 desired_align, align);
15190 /* Ensure that alignment prologue won't copy past end of block. */
15191 if (size_needed > 1 || (desired_align > 1 && desired_align > align))
15192 {
15193 epilogue_size_needed = MAX (size_needed - 1, desired_align - align);
15194 /* Epilogue always copies COUNT_EXP & EPILOGUE_SIZE_NEEDED bytes.
15195 Make sure it is power of 2. */
15196 epilogue_size_needed = smallest_pow2_greater_than (epilogue_size_needed);
15197
15198 /* To improve performance of small blocks, we jump around the VAL
15199 promoting mode. This mean that if the promoted VAL is not constant,
15200 we might not use it in the epilogue and have to use byte
15201 loop variant. */
15202 if (epilogue_size_needed > 2 && !promoted_val)
15203 force_loopy_epilogue = true;
15204 label = gen_label_rtx ();
15205 emit_cmp_and_jump_insns (count_exp,
15206 GEN_INT (epilogue_size_needed),
15207 LTU, 0, counter_mode (count_exp), 1, label);
15208 if (GET_CODE (count_exp) == CONST_INT)
15209 ;
15210 else if (expected_size == -1 || expected_size <= epilogue_size_needed)
15211 predict_jump (REG_BR_PROB_BASE * 60 / 100);
15212 else
15213 predict_jump (REG_BR_PROB_BASE * 20 / 100);
15214 }
15215 if (dynamic_check != -1)
15216 {
15217 rtx hot_label = gen_label_rtx ();
15218 jump_around_label = gen_label_rtx ();
15219 emit_cmp_and_jump_insns (count_exp, GEN_INT (dynamic_check - 1),
15220 LEU, 0, counter_mode (count_exp), 1, hot_label);
15221 predict_jump (REG_BR_PROB_BASE * 90 / 100);
15222 set_storage_via_libcall (dst, count_exp, val_exp, false);
15223 emit_jump (jump_around_label);
15224 emit_label (hot_label);
15225 }
15226
15227 /* Step 2: Alignment prologue. */
15228
15229 /* Do the expensive promotion once we branched off the small blocks. */
15230 if (!promoted_val)
15231 promoted_val = promote_duplicated_reg_to_size (val_exp, size_needed,
15232 desired_align, align);
15233 gcc_assert (desired_align >= 1 && align >= 1);
15234
15235 if (desired_align > align)
15236 {
15237 /* Except for the first move in epilogue, we no longer know
15238 constant offset in aliasing info. It don't seems to worth
15239 the pain to maintain it for the first move, so throw away
15240 the info early. */
15241 dst = change_address (dst, BLKmode, destreg);
15242 expand_setmem_prologue (dst, destreg, promoted_val, count_exp, align,
15243 desired_align);
15244 }
15245 if (label && size_needed == 1)
15246 {
15247 emit_label (label);
15248 LABEL_NUSES (label) = 1;
15249 label = NULL;
15250 }
15251
15252 /* Step 3: Main loop. */
15253
15254 switch (alg)
15255 {
15256 case libcall:
15257 case no_stringop:
15258 gcc_unreachable ();
15259 case loop_1_byte:
15260 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15261 count_exp, QImode, 1, expected_size);
15262 break;
15263 case loop:
15264 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15265 count_exp, Pmode, 1, expected_size);
15266 break;
15267 case unrolled_loop:
15268 expand_set_or_movmem_via_loop (dst, NULL, destreg, NULL, promoted_val,
15269 count_exp, Pmode, 4, expected_size);
15270 break;
15271 case rep_prefix_8_byte:
15272 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15273 DImode);
15274 break;
15275 case rep_prefix_4_byte:
15276 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15277 SImode);
15278 break;
15279 case rep_prefix_1_byte:
15280 expand_setmem_via_rep_stos (dst, destreg, promoted_val, count_exp,
15281 QImode);
15282 break;
15283 }
15284 /* Adjust properly the offset of src and dest memory for aliasing. */
15285 if (CONST_INT_P (count_exp))
15286 dst = adjust_automodify_address_nv (dst, BLKmode, destreg,
15287 (count / size_needed) * size_needed);
15288 else
15289 dst = change_address (dst, BLKmode, destreg);
15290
15291 /* Step 4: Epilogue to copy the remaining bytes. */
15292
15293 if (label)
15294 {
15295 /* When the main loop is done, COUNT_EXP might hold original count,
15296 while we want to copy only COUNT_EXP & SIZE_NEEDED bytes.
15297 Epilogue code will actually copy COUNT_EXP & EPILOGUE_SIZE_NEEDED
15298 bytes. Compensate if needed. */
15299
15300 if (size_needed < desired_align - align)
15301 {
15302 tmp =
15303 expand_simple_binop (counter_mode (count_exp), AND, count_exp,
15304 GEN_INT (size_needed - 1), count_exp, 1,
15305 OPTAB_DIRECT);
15306 size_needed = desired_align - align + 1;
15307 if (tmp != count_exp)
15308 emit_move_insn (count_exp, tmp);
15309 }
15310 emit_label (label);
15311 LABEL_NUSES (label) = 1;
15312 }
15313 if (count_exp != const0_rtx && epilogue_size_needed > 1)
15314 {
15315 if (force_loopy_epilogue)
15316 expand_setmem_epilogue_via_loop (dst, destreg, val_exp, count_exp,
15317 size_needed);
15318 else
15319 expand_setmem_epilogue (dst, destreg, promoted_val, count_exp,
15320 size_needed);
15321 }
15322 if (jump_around_label)
15323 emit_label (jump_around_label);
15324 return 1;
15325 }
15326
15327 /* Expand the appropriate insns for doing strlen if not just doing
15328 repnz; scasb
15329
15330 out = result, initialized with the start address
15331 align_rtx = alignment of the address.
15332 scratch = scratch register, initialized with the startaddress when
15333 not aligned, otherwise undefined
15334
15335 This is just the body. It needs the initializations mentioned above and
15336 some address computing at the end. These things are done in i386.md. */
15337
15338 static void
15339 ix86_expand_strlensi_unroll_1 (rtx out, rtx src, rtx align_rtx)
15340 {
15341 int align;
15342 rtx tmp;
15343 rtx align_2_label = NULL_RTX;
15344 rtx align_3_label = NULL_RTX;
15345 rtx align_4_label = gen_label_rtx ();
15346 rtx end_0_label = gen_label_rtx ();
15347 rtx mem;
15348 rtx tmpreg = gen_reg_rtx (SImode);
15349 rtx scratch = gen_reg_rtx (SImode);
15350 rtx cmp;
15351
15352 align = 0;
15353 if (CONST_INT_P (align_rtx))
15354 align = INTVAL (align_rtx);
15355
15356 /* Loop to check 1..3 bytes for null to get an aligned pointer. */
15357
15358 /* Is there a known alignment and is it less than 4? */
15359 if (align < 4)
15360 {
15361 rtx scratch1 = gen_reg_rtx (Pmode);
15362 emit_move_insn (scratch1, out);
15363 /* Is there a known alignment and is it not 2? */
15364 if (align != 2)
15365 {
15366 align_3_label = gen_label_rtx (); /* Label when aligned to 3-byte */
15367 align_2_label = gen_label_rtx (); /* Label when aligned to 2-byte */
15368
15369 /* Leave just the 3 lower bits. */
15370 align_rtx = expand_binop (Pmode, and_optab, scratch1, GEN_INT (3),
15371 NULL_RTX, 0, OPTAB_WIDEN);
15372
15373 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15374 Pmode, 1, align_4_label);
15375 emit_cmp_and_jump_insns (align_rtx, const2_rtx, EQ, NULL,
15376 Pmode, 1, align_2_label);
15377 emit_cmp_and_jump_insns (align_rtx, const2_rtx, GTU, NULL,
15378 Pmode, 1, align_3_label);
15379 }
15380 else
15381 {
15382 /* Since the alignment is 2, we have to check 2 or 0 bytes;
15383 check if is aligned to 4 - byte. */
15384
15385 align_rtx = expand_binop (Pmode, and_optab, scratch1, const2_rtx,
15386 NULL_RTX, 0, OPTAB_WIDEN);
15387
15388 emit_cmp_and_jump_insns (align_rtx, const0_rtx, EQ, NULL,
15389 Pmode, 1, align_4_label);
15390 }
15391
15392 mem = change_address (src, QImode, out);
15393
15394 /* Now compare the bytes. */
15395
15396 /* Compare the first n unaligned byte on a byte per byte basis. */
15397 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL,
15398 QImode, 1, end_0_label);
15399
15400 /* Increment the address. */
15401 if (TARGET_64BIT)
15402 emit_insn (gen_adddi3 (out, out, const1_rtx));
15403 else
15404 emit_insn (gen_addsi3 (out, out, const1_rtx));
15405
15406 /* Not needed with an alignment of 2 */
15407 if (align != 2)
15408 {
15409 emit_label (align_2_label);
15410
15411 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15412 end_0_label);
15413
15414 if (TARGET_64BIT)
15415 emit_insn (gen_adddi3 (out, out, const1_rtx));
15416 else
15417 emit_insn (gen_addsi3 (out, out, const1_rtx));
15418
15419 emit_label (align_3_label);
15420 }
15421
15422 emit_cmp_and_jump_insns (mem, const0_rtx, EQ, NULL, QImode, 1,
15423 end_0_label);
15424
15425 if (TARGET_64BIT)
15426 emit_insn (gen_adddi3 (out, out, const1_rtx));
15427 else
15428 emit_insn (gen_addsi3 (out, out, const1_rtx));
15429 }
15430
15431 /* Generate loop to check 4 bytes at a time. It is not a good idea to
15432 align this loop. It gives only huge programs, but does not help to
15433 speed up. */
15434 emit_label (align_4_label);
15435
15436 mem = change_address (src, SImode, out);
15437 emit_move_insn (scratch, mem);
15438 if (TARGET_64BIT)
15439 emit_insn (gen_adddi3 (out, out, GEN_INT (4)));
15440 else
15441 emit_insn (gen_addsi3 (out, out, GEN_INT (4)));
15442
15443 /* This formula yields a nonzero result iff one of the bytes is zero.
15444 This saves three branches inside loop and many cycles. */
15445
15446 emit_insn (gen_addsi3 (tmpreg, scratch, GEN_INT (-0x01010101)));
15447 emit_insn (gen_one_cmplsi2 (scratch, scratch));
15448 emit_insn (gen_andsi3 (tmpreg, tmpreg, scratch));
15449 emit_insn (gen_andsi3 (tmpreg, tmpreg,
15450 gen_int_mode (0x80808080, SImode)));
15451 emit_cmp_and_jump_insns (tmpreg, const0_rtx, EQ, 0, SImode, 1,
15452 align_4_label);
15453
15454 if (TARGET_CMOVE)
15455 {
15456 rtx reg = gen_reg_rtx (SImode);
15457 rtx reg2 = gen_reg_rtx (Pmode);
15458 emit_move_insn (reg, tmpreg);
15459 emit_insn (gen_lshrsi3 (reg, reg, GEN_INT (16)));
15460
15461 /* If zero is not in the first two bytes, move two bytes forward. */
15462 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15463 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15464 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15465 emit_insn (gen_rtx_SET (VOIDmode, tmpreg,
15466 gen_rtx_IF_THEN_ELSE (SImode, tmp,
15467 reg,
15468 tmpreg)));
15469 /* Emit lea manually to avoid clobbering of flags. */
15470 emit_insn (gen_rtx_SET (SImode, reg2,
15471 gen_rtx_PLUS (Pmode, out, const2_rtx)));
15472
15473 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15474 tmp = gen_rtx_EQ (VOIDmode, tmp, const0_rtx);
15475 emit_insn (gen_rtx_SET (VOIDmode, out,
15476 gen_rtx_IF_THEN_ELSE (Pmode, tmp,
15477 reg2,
15478 out)));
15479
15480 }
15481 else
15482 {
15483 rtx end_2_label = gen_label_rtx ();
15484 /* Is zero in the first two bytes? */
15485
15486 emit_insn (gen_testsi_ccno_1 (tmpreg, GEN_INT (0x8080)));
15487 tmp = gen_rtx_REG (CCNOmode, FLAGS_REG);
15488 tmp = gen_rtx_NE (VOIDmode, tmp, const0_rtx);
15489 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
15490 gen_rtx_LABEL_REF (VOIDmode, end_2_label),
15491 pc_rtx);
15492 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
15493 JUMP_LABEL (tmp) = end_2_label;
15494
15495 /* Not in the first two. Move two bytes forward. */
15496 emit_insn (gen_lshrsi3 (tmpreg, tmpreg, GEN_INT (16)));
15497 if (TARGET_64BIT)
15498 emit_insn (gen_adddi3 (out, out, const2_rtx));
15499 else
15500 emit_insn (gen_addsi3 (out, out, const2_rtx));
15501
15502 emit_label (end_2_label);
15503
15504 }
15505
15506 /* Avoid branch in fixing the byte. */
15507 tmpreg = gen_lowpart (QImode, tmpreg);
15508 emit_insn (gen_addqi3_cc (tmpreg, tmpreg, tmpreg));
15509 cmp = gen_rtx_LTU (Pmode, gen_rtx_REG (CCmode, FLAGS_REG), const0_rtx);
15510 if (TARGET_64BIT)
15511 emit_insn (gen_subdi3_carry_rex64 (out, out, GEN_INT (3), cmp));
15512 else
15513 emit_insn (gen_subsi3_carry (out, out, GEN_INT (3), cmp));
15514
15515 emit_label (end_0_label);
15516 }
15517
15518 /* Expand strlen. */
15519
15520 int
15521 ix86_expand_strlen (rtx out, rtx src, rtx eoschar, rtx align)
15522 {
15523 rtx addr, scratch1, scratch2, scratch3, scratch4;
15524
15525 /* The generic case of strlen expander is long. Avoid it's
15526 expanding unless TARGET_INLINE_ALL_STRINGOPS. */
15527
15528 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15529 && !TARGET_INLINE_ALL_STRINGOPS
15530 && !optimize_size
15531 && (!CONST_INT_P (align) || INTVAL (align) < 4))
15532 return 0;
15533
15534 addr = force_reg (Pmode, XEXP (src, 0));
15535 scratch1 = gen_reg_rtx (Pmode);
15536
15537 if (TARGET_UNROLL_STRLEN && eoschar == const0_rtx && optimize > 1
15538 && !optimize_size)
15539 {
15540 /* Well it seems that some optimizer does not combine a call like
15541 foo(strlen(bar), strlen(bar));
15542 when the move and the subtraction is done here. It does calculate
15543 the length just once when these instructions are done inside of
15544 output_strlen_unroll(). But I think since &bar[strlen(bar)] is
15545 often used and I use one fewer register for the lifetime of
15546 output_strlen_unroll() this is better. */
15547
15548 emit_move_insn (out, addr);
15549
15550 ix86_expand_strlensi_unroll_1 (out, src, align);
15551
15552 /* strlensi_unroll_1 returns the address of the zero at the end of
15553 the string, like memchr(), so compute the length by subtracting
15554 the start address. */
15555 if (TARGET_64BIT)
15556 emit_insn (gen_subdi3 (out, out, addr));
15557 else
15558 emit_insn (gen_subsi3 (out, out, addr));
15559 }
15560 else
15561 {
15562 rtx unspec;
15563 scratch2 = gen_reg_rtx (Pmode);
15564 scratch3 = gen_reg_rtx (Pmode);
15565 scratch4 = force_reg (Pmode, constm1_rtx);
15566
15567 emit_move_insn (scratch3, addr);
15568 eoschar = force_reg (QImode, eoschar);
15569
15570 src = replace_equiv_address_nv (src, scratch3);
15571
15572 /* If .md starts supporting :P, this can be done in .md. */
15573 unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (4, src, eoschar, align,
15574 scratch4), UNSPEC_SCAS);
15575 emit_insn (gen_strlenqi_1 (scratch1, scratch3, unspec));
15576 if (TARGET_64BIT)
15577 {
15578 emit_insn (gen_one_cmpldi2 (scratch2, scratch1));
15579 emit_insn (gen_adddi3 (out, scratch2, constm1_rtx));
15580 }
15581 else
15582 {
15583 emit_insn (gen_one_cmplsi2 (scratch2, scratch1));
15584 emit_insn (gen_addsi3 (out, scratch2, constm1_rtx));
15585 }
15586 }
15587 return 1;
15588 }
15589
15590 /* For given symbol (function) construct code to compute address of it's PLT
15591 entry in large x86-64 PIC model. */
15592 rtx
15593 construct_plt_address (rtx symbol)
15594 {
15595 rtx tmp = gen_reg_rtx (Pmode);
15596 rtx unspec = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, symbol), UNSPEC_PLTOFF);
15597
15598 gcc_assert (GET_CODE (symbol) == SYMBOL_REF);
15599 gcc_assert (ix86_cmodel == CM_LARGE_PIC);
15600
15601 emit_move_insn (tmp, gen_rtx_CONST (Pmode, unspec));
15602 emit_insn (gen_adddi3 (tmp, tmp, pic_offset_table_rtx));
15603 return tmp;
15604 }
15605
15606 void
15607 ix86_expand_call (rtx retval, rtx fnaddr, rtx callarg1,
15608 rtx callarg2 ATTRIBUTE_UNUSED,
15609 rtx pop, int sibcall)
15610 {
15611 rtx use = NULL, call;
15612
15613 if (pop == const0_rtx)
15614 pop = NULL;
15615 gcc_assert (!TARGET_64BIT || !pop);
15616
15617 if (TARGET_MACHO && !TARGET_64BIT)
15618 {
15619 #if TARGET_MACHO
15620 if (flag_pic && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF)
15621 fnaddr = machopic_indirect_call_target (fnaddr);
15622 #endif
15623 }
15624 else
15625 {
15626 /* Static functions and indirect calls don't need the pic register. */
15627 if (flag_pic && (!TARGET_64BIT || ix86_cmodel == CM_LARGE_PIC)
15628 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15629 && ! SYMBOL_REF_LOCAL_P (XEXP (fnaddr, 0)))
15630 use_reg (&use, pic_offset_table_rtx);
15631 }
15632
15633 if (TARGET_64BIT && INTVAL (callarg2) >= 0)
15634 {
15635 rtx al = gen_rtx_REG (QImode, 0);
15636 emit_move_insn (al, callarg2);
15637 use_reg (&use, al);
15638 }
15639
15640 if (ix86_cmodel == CM_LARGE_PIC
15641 && GET_CODE (fnaddr) == MEM
15642 && GET_CODE (XEXP (fnaddr, 0)) == SYMBOL_REF
15643 && !local_symbolic_operand (XEXP (fnaddr, 0), VOIDmode))
15644 fnaddr = gen_rtx_MEM (QImode, construct_plt_address (XEXP (fnaddr, 0)));
15645 else if (! call_insn_operand (XEXP (fnaddr, 0), Pmode))
15646 {
15647 fnaddr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15648 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15649 }
15650 if (sibcall && TARGET_64BIT
15651 && !constant_call_address_operand (XEXP (fnaddr, 0), Pmode))
15652 {
15653 rtx addr;
15654 addr = copy_to_mode_reg (Pmode, XEXP (fnaddr, 0));
15655 fnaddr = gen_rtx_REG (Pmode, R11_REG);
15656 emit_move_insn (fnaddr, addr);
15657 fnaddr = gen_rtx_MEM (QImode, fnaddr);
15658 }
15659
15660 call = gen_rtx_CALL (VOIDmode, fnaddr, callarg1);
15661 if (retval)
15662 call = gen_rtx_SET (VOIDmode, retval, call);
15663 if (pop)
15664 {
15665 pop = gen_rtx_PLUS (Pmode, stack_pointer_rtx, pop);
15666 pop = gen_rtx_SET (VOIDmode, stack_pointer_rtx, pop);
15667 call = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, call, pop));
15668 }
15669
15670 call = emit_call_insn (call);
15671 if (use)
15672 CALL_INSN_FUNCTION_USAGE (call) = use;
15673 }
15674
15675 \f
15676 /* Clear stack slot assignments remembered from previous functions.
15677 This is called from INIT_EXPANDERS once before RTL is emitted for each
15678 function. */
15679
15680 static struct machine_function *
15681 ix86_init_machine_status (void)
15682 {
15683 struct machine_function *f;
15684
15685 f = GGC_CNEW (struct machine_function);
15686 f->use_fast_prologue_epilogue_nregs = -1;
15687 f->tls_descriptor_call_expanded_p = 0;
15688
15689 return f;
15690 }
15691
15692 /* Return a MEM corresponding to a stack slot with mode MODE.
15693 Allocate a new slot if necessary.
15694
15695 The RTL for a function can have several slots available: N is
15696 which slot to use. */
15697
15698 rtx
15699 assign_386_stack_local (enum machine_mode mode, enum ix86_stack_slot n)
15700 {
15701 struct stack_local_entry *s;
15702
15703 gcc_assert (n < MAX_386_STACK_LOCALS);
15704
15705 /* Virtual slot is valid only before vregs are instantiated. */
15706 gcc_assert ((n == SLOT_VIRTUAL) == !virtuals_instantiated);
15707
15708 for (s = ix86_stack_locals; s; s = s->next)
15709 if (s->mode == mode && s->n == n)
15710 return copy_rtx (s->rtl);
15711
15712 s = (struct stack_local_entry *)
15713 ggc_alloc (sizeof (struct stack_local_entry));
15714 s->n = n;
15715 s->mode = mode;
15716 s->rtl = assign_stack_local (mode, GET_MODE_SIZE (mode), 0);
15717
15718 s->next = ix86_stack_locals;
15719 ix86_stack_locals = s;
15720 return s->rtl;
15721 }
15722
15723 /* Construct the SYMBOL_REF for the tls_get_addr function. */
15724
15725 static GTY(()) rtx ix86_tls_symbol;
15726 rtx
15727 ix86_tls_get_addr (void)
15728 {
15729
15730 if (!ix86_tls_symbol)
15731 {
15732 ix86_tls_symbol = gen_rtx_SYMBOL_REF (Pmode,
15733 (TARGET_ANY_GNU_TLS
15734 && !TARGET_64BIT)
15735 ? "___tls_get_addr"
15736 : "__tls_get_addr");
15737 }
15738
15739 return ix86_tls_symbol;
15740 }
15741
15742 /* Construct the SYMBOL_REF for the _TLS_MODULE_BASE_ symbol. */
15743
15744 static GTY(()) rtx ix86_tls_module_base_symbol;
15745 rtx
15746 ix86_tls_module_base (void)
15747 {
15748
15749 if (!ix86_tls_module_base_symbol)
15750 {
15751 ix86_tls_module_base_symbol = gen_rtx_SYMBOL_REF (Pmode,
15752 "_TLS_MODULE_BASE_");
15753 SYMBOL_REF_FLAGS (ix86_tls_module_base_symbol)
15754 |= TLS_MODEL_GLOBAL_DYNAMIC << SYMBOL_FLAG_TLS_SHIFT;
15755 }
15756
15757 return ix86_tls_module_base_symbol;
15758 }
15759 \f
15760 /* Calculate the length of the memory address in the instruction
15761 encoding. Does not include the one-byte modrm, opcode, or prefix. */
15762
15763 int
15764 memory_address_length (rtx addr)
15765 {
15766 struct ix86_address parts;
15767 rtx base, index, disp;
15768 int len;
15769 int ok;
15770
15771 if (GET_CODE (addr) == PRE_DEC
15772 || GET_CODE (addr) == POST_INC
15773 || GET_CODE (addr) == PRE_MODIFY
15774 || GET_CODE (addr) == POST_MODIFY)
15775 return 0;
15776
15777 ok = ix86_decompose_address (addr, &parts);
15778 gcc_assert (ok);
15779
15780 if (parts.base && GET_CODE (parts.base) == SUBREG)
15781 parts.base = SUBREG_REG (parts.base);
15782 if (parts.index && GET_CODE (parts.index) == SUBREG)
15783 parts.index = SUBREG_REG (parts.index);
15784
15785 base = parts.base;
15786 index = parts.index;
15787 disp = parts.disp;
15788 len = 0;
15789
15790 /* Rule of thumb:
15791 - esp as the base always wants an index,
15792 - ebp as the base always wants a displacement. */
15793
15794 /* Register Indirect. */
15795 if (base && !index && !disp)
15796 {
15797 /* esp (for its index) and ebp (for its displacement) need
15798 the two-byte modrm form. */
15799 if (addr == stack_pointer_rtx
15800 || addr == arg_pointer_rtx
15801 || addr == frame_pointer_rtx
15802 || addr == hard_frame_pointer_rtx)
15803 len = 1;
15804 }
15805
15806 /* Direct Addressing. */
15807 else if (disp && !base && !index)
15808 len = 4;
15809
15810 else
15811 {
15812 /* Find the length of the displacement constant. */
15813 if (disp)
15814 {
15815 if (base && satisfies_constraint_K (disp))
15816 len = 1;
15817 else
15818 len = 4;
15819 }
15820 /* ebp always wants a displacement. */
15821 else if (base == hard_frame_pointer_rtx)
15822 len = 1;
15823
15824 /* An index requires the two-byte modrm form.... */
15825 if (index
15826 /* ...like esp, which always wants an index. */
15827 || base == stack_pointer_rtx
15828 || base == arg_pointer_rtx
15829 || base == frame_pointer_rtx)
15830 len += 1;
15831 }
15832
15833 return len;
15834 }
15835
15836 /* Compute default value for "length_immediate" attribute. When SHORTFORM
15837 is set, expect that insn have 8bit immediate alternative. */
15838 int
15839 ix86_attr_length_immediate_default (rtx insn, int shortform)
15840 {
15841 int len = 0;
15842 int i;
15843 extract_insn_cached (insn);
15844 for (i = recog_data.n_operands - 1; i >= 0; --i)
15845 if (CONSTANT_P (recog_data.operand[i]))
15846 {
15847 gcc_assert (!len);
15848 if (shortform && satisfies_constraint_K (recog_data.operand[i]))
15849 len = 1;
15850 else
15851 {
15852 switch (get_attr_mode (insn))
15853 {
15854 case MODE_QI:
15855 len+=1;
15856 break;
15857 case MODE_HI:
15858 len+=2;
15859 break;
15860 case MODE_SI:
15861 len+=4;
15862 break;
15863 /* Immediates for DImode instructions are encoded as 32bit sign extended values. */
15864 case MODE_DI:
15865 len+=4;
15866 break;
15867 default:
15868 fatal_insn ("unknown insn mode", insn);
15869 }
15870 }
15871 }
15872 return len;
15873 }
15874 /* Compute default value for "length_address" attribute. */
15875 int
15876 ix86_attr_length_address_default (rtx insn)
15877 {
15878 int i;
15879
15880 if (get_attr_type (insn) == TYPE_LEA)
15881 {
15882 rtx set = PATTERN (insn);
15883
15884 if (GET_CODE (set) == PARALLEL)
15885 set = XVECEXP (set, 0, 0);
15886
15887 gcc_assert (GET_CODE (set) == SET);
15888
15889 return memory_address_length (SET_SRC (set));
15890 }
15891
15892 extract_insn_cached (insn);
15893 for (i = recog_data.n_operands - 1; i >= 0; --i)
15894 if (MEM_P (recog_data.operand[i]))
15895 {
15896 return memory_address_length (XEXP (recog_data.operand[i], 0));
15897 break;
15898 }
15899 return 0;
15900 }
15901 \f
15902 /* Return the maximum number of instructions a cpu can issue. */
15903
15904 static int
15905 ix86_issue_rate (void)
15906 {
15907 switch (ix86_tune)
15908 {
15909 case PROCESSOR_PENTIUM:
15910 case PROCESSOR_K6:
15911 return 2;
15912
15913 case PROCESSOR_PENTIUMPRO:
15914 case PROCESSOR_PENTIUM4:
15915 case PROCESSOR_ATHLON:
15916 case PROCESSOR_K8:
15917 case PROCESSOR_AMDFAM10:
15918 case PROCESSOR_NOCONA:
15919 case PROCESSOR_GENERIC32:
15920 case PROCESSOR_GENERIC64:
15921 return 3;
15922
15923 case PROCESSOR_CORE2:
15924 return 4;
15925
15926 default:
15927 return 1;
15928 }
15929 }
15930
15931 /* A subroutine of ix86_adjust_cost -- return true iff INSN reads flags set
15932 by DEP_INSN and nothing set by DEP_INSN. */
15933
15934 static int
15935 ix86_flags_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15936 {
15937 rtx set, set2;
15938
15939 /* Simplify the test for uninteresting insns. */
15940 if (insn_type != TYPE_SETCC
15941 && insn_type != TYPE_ICMOV
15942 && insn_type != TYPE_FCMOV
15943 && insn_type != TYPE_IBR)
15944 return 0;
15945
15946 if ((set = single_set (dep_insn)) != 0)
15947 {
15948 set = SET_DEST (set);
15949 set2 = NULL_RTX;
15950 }
15951 else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
15952 && XVECLEN (PATTERN (dep_insn), 0) == 2
15953 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
15954 && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
15955 {
15956 set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15957 set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
15958 }
15959 else
15960 return 0;
15961
15962 if (!REG_P (set) || REGNO (set) != FLAGS_REG)
15963 return 0;
15964
15965 /* This test is true if the dependent insn reads the flags but
15966 not any other potentially set register. */
15967 if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
15968 return 0;
15969
15970 if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
15971 return 0;
15972
15973 return 1;
15974 }
15975
15976 /* A subroutine of ix86_adjust_cost -- return true iff INSN has a memory
15977 address with operands set by DEP_INSN. */
15978
15979 static int
15980 ix86_agi_dependent (rtx insn, rtx dep_insn, enum attr_type insn_type)
15981 {
15982 rtx addr;
15983
15984 if (insn_type == TYPE_LEA
15985 && TARGET_PENTIUM)
15986 {
15987 addr = PATTERN (insn);
15988
15989 if (GET_CODE (addr) == PARALLEL)
15990 addr = XVECEXP (addr, 0, 0);
15991
15992 gcc_assert (GET_CODE (addr) == SET);
15993
15994 addr = SET_SRC (addr);
15995 }
15996 else
15997 {
15998 int i;
15999 extract_insn_cached (insn);
16000 for (i = recog_data.n_operands - 1; i >= 0; --i)
16001 if (MEM_P (recog_data.operand[i]))
16002 {
16003 addr = XEXP (recog_data.operand[i], 0);
16004 goto found;
16005 }
16006 return 0;
16007 found:;
16008 }
16009
16010 return modified_in_p (addr, dep_insn);
16011 }
16012
16013 static int
16014 ix86_adjust_cost (rtx insn, rtx link, rtx dep_insn, int cost)
16015 {
16016 enum attr_type insn_type, dep_insn_type;
16017 enum attr_memory memory;
16018 rtx set, set2;
16019 int dep_insn_code_number;
16020
16021 /* Anti and output dependencies have zero cost on all CPUs. */
16022 if (REG_NOTE_KIND (link) != 0)
16023 return 0;
16024
16025 dep_insn_code_number = recog_memoized (dep_insn);
16026
16027 /* If we can't recognize the insns, we can't really do anything. */
16028 if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
16029 return cost;
16030
16031 insn_type = get_attr_type (insn);
16032 dep_insn_type = get_attr_type (dep_insn);
16033
16034 switch (ix86_tune)
16035 {
16036 case PROCESSOR_PENTIUM:
16037 /* Address Generation Interlock adds a cycle of latency. */
16038 if (ix86_agi_dependent (insn, dep_insn, insn_type))
16039 cost += 1;
16040
16041 /* ??? Compares pair with jump/setcc. */
16042 if (ix86_flags_dependent (insn, dep_insn, insn_type))
16043 cost = 0;
16044
16045 /* Floating point stores require value to be ready one cycle earlier. */
16046 if (insn_type == TYPE_FMOV
16047 && get_attr_memory (insn) == MEMORY_STORE
16048 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16049 cost += 1;
16050 break;
16051
16052 case PROCESSOR_PENTIUMPRO:
16053 memory = get_attr_memory (insn);
16054
16055 /* INT->FP conversion is expensive. */
16056 if (get_attr_fp_int_src (dep_insn))
16057 cost += 5;
16058
16059 /* There is one cycle extra latency between an FP op and a store. */
16060 if (insn_type == TYPE_FMOV
16061 && (set = single_set (dep_insn)) != NULL_RTX
16062 && (set2 = single_set (insn)) != NULL_RTX
16063 && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
16064 && MEM_P (SET_DEST (set2)))
16065 cost += 1;
16066
16067 /* Show ability of reorder buffer to hide latency of load by executing
16068 in parallel with previous instruction in case
16069 previous instruction is not needed to compute the address. */
16070 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16071 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16072 {
16073 /* Claim moves to take one cycle, as core can issue one load
16074 at time and the next load can start cycle later. */
16075 if (dep_insn_type == TYPE_IMOV
16076 || dep_insn_type == TYPE_FMOV)
16077 cost = 1;
16078 else if (cost > 1)
16079 cost--;
16080 }
16081 break;
16082
16083 case PROCESSOR_K6:
16084 memory = get_attr_memory (insn);
16085
16086 /* The esp dependency is resolved before the instruction is really
16087 finished. */
16088 if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
16089 && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
16090 return 1;
16091
16092 /* INT->FP conversion is expensive. */
16093 if (get_attr_fp_int_src (dep_insn))
16094 cost += 5;
16095
16096 /* Show ability of reorder buffer to hide latency of load by executing
16097 in parallel with previous instruction in case
16098 previous instruction is not needed to compute the address. */
16099 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16100 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16101 {
16102 /* Claim moves to take one cycle, as core can issue one load
16103 at time and the next load can start cycle later. */
16104 if (dep_insn_type == TYPE_IMOV
16105 || dep_insn_type == TYPE_FMOV)
16106 cost = 1;
16107 else if (cost > 2)
16108 cost -= 2;
16109 else
16110 cost = 1;
16111 }
16112 break;
16113
16114 case PROCESSOR_ATHLON:
16115 case PROCESSOR_K8:
16116 case PROCESSOR_AMDFAM10:
16117 case PROCESSOR_GENERIC32:
16118 case PROCESSOR_GENERIC64:
16119 memory = get_attr_memory (insn);
16120
16121 /* Show ability of reorder buffer to hide latency of load by executing
16122 in parallel with previous instruction in case
16123 previous instruction is not needed to compute the address. */
16124 if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
16125 && !ix86_agi_dependent (insn, dep_insn, insn_type))
16126 {
16127 enum attr_unit unit = get_attr_unit (insn);
16128 int loadcost = 3;
16129
16130 /* Because of the difference between the length of integer and
16131 floating unit pipeline preparation stages, the memory operands
16132 for floating point are cheaper.
16133
16134 ??? For Athlon it the difference is most probably 2. */
16135 if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
16136 loadcost = 3;
16137 else
16138 loadcost = TARGET_ATHLON ? 2 : 0;
16139
16140 if (cost >= loadcost)
16141 cost -= loadcost;
16142 else
16143 cost = 0;
16144 }
16145
16146 default:
16147 break;
16148 }
16149
16150 return cost;
16151 }
16152
16153 /* How many alternative schedules to try. This should be as wide as the
16154 scheduling freedom in the DFA, but no wider. Making this value too
16155 large results extra work for the scheduler. */
16156
16157 static int
16158 ia32_multipass_dfa_lookahead (void)
16159 {
16160 if (ix86_tune == PROCESSOR_PENTIUM)
16161 return 2;
16162
16163 if (ix86_tune == PROCESSOR_PENTIUMPRO
16164 || ix86_tune == PROCESSOR_K6)
16165 return 1;
16166
16167 else
16168 return 0;
16169 }
16170
16171 \f
16172 /* Compute the alignment given to a constant that is being placed in memory.
16173 EXP is the constant and ALIGN is the alignment that the object would
16174 ordinarily have.
16175 The value of this function is used instead of that alignment to align
16176 the object. */
16177
16178 int
16179 ix86_constant_alignment (tree exp, int align)
16180 {
16181 if (TREE_CODE (exp) == REAL_CST)
16182 {
16183 if (TYPE_MODE (TREE_TYPE (exp)) == DFmode && align < 64)
16184 return 64;
16185 else if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (exp))) && align < 128)
16186 return 128;
16187 }
16188 else if (!optimize_size && TREE_CODE (exp) == STRING_CST
16189 && TREE_STRING_LENGTH (exp) >= 31 && align < BITS_PER_WORD)
16190 return BITS_PER_WORD;
16191
16192 return align;
16193 }
16194
16195 /* Compute the alignment for a static variable.
16196 TYPE is the data type, and ALIGN is the alignment that
16197 the object would ordinarily have. The value of this function is used
16198 instead of that alignment to align the object. */
16199
16200 int
16201 ix86_data_alignment (tree type, int align)
16202 {
16203 int max_align = optimize_size ? BITS_PER_WORD : MIN (256, MAX_OFILE_ALIGNMENT);
16204
16205 if (AGGREGATE_TYPE_P (type)
16206 && TYPE_SIZE (type)
16207 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16208 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= (unsigned) max_align
16209 || TREE_INT_CST_HIGH (TYPE_SIZE (type)))
16210 && align < max_align)
16211 align = max_align;
16212
16213 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16214 to 16byte boundary. */
16215 if (TARGET_64BIT)
16216 {
16217 if (AGGREGATE_TYPE_P (type)
16218 && TYPE_SIZE (type)
16219 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16220 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 128
16221 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16222 return 128;
16223 }
16224
16225 if (TREE_CODE (type) == ARRAY_TYPE)
16226 {
16227 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16228 return 64;
16229 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16230 return 128;
16231 }
16232 else if (TREE_CODE (type) == COMPLEX_TYPE)
16233 {
16234
16235 if (TYPE_MODE (type) == DCmode && align < 64)
16236 return 64;
16237 if (TYPE_MODE (type) == XCmode && align < 128)
16238 return 128;
16239 }
16240 else if ((TREE_CODE (type) == RECORD_TYPE
16241 || TREE_CODE (type) == UNION_TYPE
16242 || TREE_CODE (type) == QUAL_UNION_TYPE)
16243 && TYPE_FIELDS (type))
16244 {
16245 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16246 return 64;
16247 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16248 return 128;
16249 }
16250 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16251 || TREE_CODE (type) == INTEGER_TYPE)
16252 {
16253 if (TYPE_MODE (type) == DFmode && align < 64)
16254 return 64;
16255 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16256 return 128;
16257 }
16258
16259 return align;
16260 }
16261
16262 /* Compute the alignment for a local variable.
16263 TYPE is the data type, and ALIGN is the alignment that
16264 the object would ordinarily have. The value of this macro is used
16265 instead of that alignment to align the object. */
16266
16267 int
16268 ix86_local_alignment (tree type, int align)
16269 {
16270 /* x86-64 ABI requires arrays greater than 16 bytes to be aligned
16271 to 16byte boundary. */
16272 if (TARGET_64BIT)
16273 {
16274 if (AGGREGATE_TYPE_P (type)
16275 && TYPE_SIZE (type)
16276 && TREE_CODE (TYPE_SIZE (type)) == INTEGER_CST
16277 && (TREE_INT_CST_LOW (TYPE_SIZE (type)) >= 16
16278 || TREE_INT_CST_HIGH (TYPE_SIZE (type))) && align < 128)
16279 return 128;
16280 }
16281 if (TREE_CODE (type) == ARRAY_TYPE)
16282 {
16283 if (TYPE_MODE (TREE_TYPE (type)) == DFmode && align < 64)
16284 return 64;
16285 if (ALIGN_MODE_128 (TYPE_MODE (TREE_TYPE (type))) && align < 128)
16286 return 128;
16287 }
16288 else if (TREE_CODE (type) == COMPLEX_TYPE)
16289 {
16290 if (TYPE_MODE (type) == DCmode && align < 64)
16291 return 64;
16292 if (TYPE_MODE (type) == XCmode && align < 128)
16293 return 128;
16294 }
16295 else if ((TREE_CODE (type) == RECORD_TYPE
16296 || TREE_CODE (type) == UNION_TYPE
16297 || TREE_CODE (type) == QUAL_UNION_TYPE)
16298 && TYPE_FIELDS (type))
16299 {
16300 if (DECL_MODE (TYPE_FIELDS (type)) == DFmode && align < 64)
16301 return 64;
16302 if (ALIGN_MODE_128 (DECL_MODE (TYPE_FIELDS (type))) && align < 128)
16303 return 128;
16304 }
16305 else if (TREE_CODE (type) == REAL_TYPE || TREE_CODE (type) == VECTOR_TYPE
16306 || TREE_CODE (type) == INTEGER_TYPE)
16307 {
16308
16309 if (TYPE_MODE (type) == DFmode && align < 64)
16310 return 64;
16311 if (ALIGN_MODE_128 (TYPE_MODE (type)) && align < 128)
16312 return 128;
16313 }
16314 return align;
16315 }
16316 \f
16317 /* Emit RTL insns to initialize the variable parts of a trampoline.
16318 FNADDR is an RTX for the address of the function's pure code.
16319 CXT is an RTX for the static chain value for the function. */
16320 void
16321 x86_initialize_trampoline (rtx tramp, rtx fnaddr, rtx cxt)
16322 {
16323 if (!TARGET_64BIT)
16324 {
16325 /* Compute offset from the end of the jmp to the target function. */
16326 rtx disp = expand_binop (SImode, sub_optab, fnaddr,
16327 plus_constant (tramp, 10),
16328 NULL_RTX, 1, OPTAB_DIRECT);
16329 emit_move_insn (gen_rtx_MEM (QImode, tramp),
16330 gen_int_mode (0xb9, QImode));
16331 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 1)), cxt);
16332 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, 5)),
16333 gen_int_mode (0xe9, QImode));
16334 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, 6)), disp);
16335 }
16336 else
16337 {
16338 int offset = 0;
16339 /* Try to load address using shorter movl instead of movabs.
16340 We may want to support movq for kernel mode, but kernel does not use
16341 trampolines at the moment. */
16342 if (x86_64_zext_immediate_operand (fnaddr, VOIDmode))
16343 {
16344 fnaddr = copy_to_mode_reg (DImode, fnaddr);
16345 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16346 gen_int_mode (0xbb41, HImode));
16347 emit_move_insn (gen_rtx_MEM (SImode, plus_constant (tramp, offset + 2)),
16348 gen_lowpart (SImode, fnaddr));
16349 offset += 6;
16350 }
16351 else
16352 {
16353 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16354 gen_int_mode (0xbb49, HImode));
16355 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16356 fnaddr);
16357 offset += 10;
16358 }
16359 /* Load static chain using movabs to r10. */
16360 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16361 gen_int_mode (0xba49, HImode));
16362 emit_move_insn (gen_rtx_MEM (DImode, plus_constant (tramp, offset + 2)),
16363 cxt);
16364 offset += 10;
16365 /* Jump to the r11 */
16366 emit_move_insn (gen_rtx_MEM (HImode, plus_constant (tramp, offset)),
16367 gen_int_mode (0xff49, HImode));
16368 emit_move_insn (gen_rtx_MEM (QImode, plus_constant (tramp, offset+2)),
16369 gen_int_mode (0xe3, QImode));
16370 offset += 3;
16371 gcc_assert (offset <= TRAMPOLINE_SIZE);
16372 }
16373
16374 #ifdef ENABLE_EXECUTE_STACK
16375 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__enable_execute_stack"),
16376 LCT_NORMAL, VOIDmode, 1, tramp, Pmode);
16377 #endif
16378 }
16379 \f
16380 /* Codes for all the SSE/MMX builtins. */
16381 enum ix86_builtins
16382 {
16383 IX86_BUILTIN_ADDPS,
16384 IX86_BUILTIN_ADDSS,
16385 IX86_BUILTIN_DIVPS,
16386 IX86_BUILTIN_DIVSS,
16387 IX86_BUILTIN_MULPS,
16388 IX86_BUILTIN_MULSS,
16389 IX86_BUILTIN_SUBPS,
16390 IX86_BUILTIN_SUBSS,
16391
16392 IX86_BUILTIN_CMPEQPS,
16393 IX86_BUILTIN_CMPLTPS,
16394 IX86_BUILTIN_CMPLEPS,
16395 IX86_BUILTIN_CMPGTPS,
16396 IX86_BUILTIN_CMPGEPS,
16397 IX86_BUILTIN_CMPNEQPS,
16398 IX86_BUILTIN_CMPNLTPS,
16399 IX86_BUILTIN_CMPNLEPS,
16400 IX86_BUILTIN_CMPNGTPS,
16401 IX86_BUILTIN_CMPNGEPS,
16402 IX86_BUILTIN_CMPORDPS,
16403 IX86_BUILTIN_CMPUNORDPS,
16404 IX86_BUILTIN_CMPEQSS,
16405 IX86_BUILTIN_CMPLTSS,
16406 IX86_BUILTIN_CMPLESS,
16407 IX86_BUILTIN_CMPNEQSS,
16408 IX86_BUILTIN_CMPNLTSS,
16409 IX86_BUILTIN_CMPNLESS,
16410 IX86_BUILTIN_CMPNGTSS,
16411 IX86_BUILTIN_CMPNGESS,
16412 IX86_BUILTIN_CMPORDSS,
16413 IX86_BUILTIN_CMPUNORDSS,
16414
16415 IX86_BUILTIN_COMIEQSS,
16416 IX86_BUILTIN_COMILTSS,
16417 IX86_BUILTIN_COMILESS,
16418 IX86_BUILTIN_COMIGTSS,
16419 IX86_BUILTIN_COMIGESS,
16420 IX86_BUILTIN_COMINEQSS,
16421 IX86_BUILTIN_UCOMIEQSS,
16422 IX86_BUILTIN_UCOMILTSS,
16423 IX86_BUILTIN_UCOMILESS,
16424 IX86_BUILTIN_UCOMIGTSS,
16425 IX86_BUILTIN_UCOMIGESS,
16426 IX86_BUILTIN_UCOMINEQSS,
16427
16428 IX86_BUILTIN_CVTPI2PS,
16429 IX86_BUILTIN_CVTPS2PI,
16430 IX86_BUILTIN_CVTSI2SS,
16431 IX86_BUILTIN_CVTSI642SS,
16432 IX86_BUILTIN_CVTSS2SI,
16433 IX86_BUILTIN_CVTSS2SI64,
16434 IX86_BUILTIN_CVTTPS2PI,
16435 IX86_BUILTIN_CVTTSS2SI,
16436 IX86_BUILTIN_CVTTSS2SI64,
16437
16438 IX86_BUILTIN_MAXPS,
16439 IX86_BUILTIN_MAXSS,
16440 IX86_BUILTIN_MINPS,
16441 IX86_BUILTIN_MINSS,
16442
16443 IX86_BUILTIN_LOADUPS,
16444 IX86_BUILTIN_STOREUPS,
16445 IX86_BUILTIN_MOVSS,
16446
16447 IX86_BUILTIN_MOVHLPS,
16448 IX86_BUILTIN_MOVLHPS,
16449 IX86_BUILTIN_LOADHPS,
16450 IX86_BUILTIN_LOADLPS,
16451 IX86_BUILTIN_STOREHPS,
16452 IX86_BUILTIN_STORELPS,
16453
16454 IX86_BUILTIN_MASKMOVQ,
16455 IX86_BUILTIN_MOVMSKPS,
16456 IX86_BUILTIN_PMOVMSKB,
16457
16458 IX86_BUILTIN_MOVNTPS,
16459 IX86_BUILTIN_MOVNTQ,
16460
16461 IX86_BUILTIN_LOADDQU,
16462 IX86_BUILTIN_STOREDQU,
16463
16464 IX86_BUILTIN_PACKSSWB,
16465 IX86_BUILTIN_PACKSSDW,
16466 IX86_BUILTIN_PACKUSWB,
16467
16468 IX86_BUILTIN_PADDB,
16469 IX86_BUILTIN_PADDW,
16470 IX86_BUILTIN_PADDD,
16471 IX86_BUILTIN_PADDQ,
16472 IX86_BUILTIN_PADDSB,
16473 IX86_BUILTIN_PADDSW,
16474 IX86_BUILTIN_PADDUSB,
16475 IX86_BUILTIN_PADDUSW,
16476 IX86_BUILTIN_PSUBB,
16477 IX86_BUILTIN_PSUBW,
16478 IX86_BUILTIN_PSUBD,
16479 IX86_BUILTIN_PSUBQ,
16480 IX86_BUILTIN_PSUBSB,
16481 IX86_BUILTIN_PSUBSW,
16482 IX86_BUILTIN_PSUBUSB,
16483 IX86_BUILTIN_PSUBUSW,
16484
16485 IX86_BUILTIN_PAND,
16486 IX86_BUILTIN_PANDN,
16487 IX86_BUILTIN_POR,
16488 IX86_BUILTIN_PXOR,
16489
16490 IX86_BUILTIN_PAVGB,
16491 IX86_BUILTIN_PAVGW,
16492
16493 IX86_BUILTIN_PCMPEQB,
16494 IX86_BUILTIN_PCMPEQW,
16495 IX86_BUILTIN_PCMPEQD,
16496 IX86_BUILTIN_PCMPGTB,
16497 IX86_BUILTIN_PCMPGTW,
16498 IX86_BUILTIN_PCMPGTD,
16499
16500 IX86_BUILTIN_PMADDWD,
16501
16502 IX86_BUILTIN_PMAXSW,
16503 IX86_BUILTIN_PMAXUB,
16504 IX86_BUILTIN_PMINSW,
16505 IX86_BUILTIN_PMINUB,
16506
16507 IX86_BUILTIN_PMULHUW,
16508 IX86_BUILTIN_PMULHW,
16509 IX86_BUILTIN_PMULLW,
16510
16511 IX86_BUILTIN_PSADBW,
16512 IX86_BUILTIN_PSHUFW,
16513
16514 IX86_BUILTIN_PSLLW,
16515 IX86_BUILTIN_PSLLD,
16516 IX86_BUILTIN_PSLLQ,
16517 IX86_BUILTIN_PSRAW,
16518 IX86_BUILTIN_PSRAD,
16519 IX86_BUILTIN_PSRLW,
16520 IX86_BUILTIN_PSRLD,
16521 IX86_BUILTIN_PSRLQ,
16522 IX86_BUILTIN_PSLLWI,
16523 IX86_BUILTIN_PSLLDI,
16524 IX86_BUILTIN_PSLLQI,
16525 IX86_BUILTIN_PSRAWI,
16526 IX86_BUILTIN_PSRADI,
16527 IX86_BUILTIN_PSRLWI,
16528 IX86_BUILTIN_PSRLDI,
16529 IX86_BUILTIN_PSRLQI,
16530
16531 IX86_BUILTIN_PUNPCKHBW,
16532 IX86_BUILTIN_PUNPCKHWD,
16533 IX86_BUILTIN_PUNPCKHDQ,
16534 IX86_BUILTIN_PUNPCKLBW,
16535 IX86_BUILTIN_PUNPCKLWD,
16536 IX86_BUILTIN_PUNPCKLDQ,
16537
16538 IX86_BUILTIN_SHUFPS,
16539
16540 IX86_BUILTIN_RCPPS,
16541 IX86_BUILTIN_RCPSS,
16542 IX86_BUILTIN_RSQRTPS,
16543 IX86_BUILTIN_RSQRTSS,
16544 IX86_BUILTIN_RSQRTF,
16545 IX86_BUILTIN_SQRTPS,
16546 IX86_BUILTIN_SQRTSS,
16547
16548 IX86_BUILTIN_UNPCKHPS,
16549 IX86_BUILTIN_UNPCKLPS,
16550
16551 IX86_BUILTIN_ANDPS,
16552 IX86_BUILTIN_ANDNPS,
16553 IX86_BUILTIN_ORPS,
16554 IX86_BUILTIN_XORPS,
16555
16556 IX86_BUILTIN_EMMS,
16557 IX86_BUILTIN_LDMXCSR,
16558 IX86_BUILTIN_STMXCSR,
16559 IX86_BUILTIN_SFENCE,
16560
16561 /* 3DNow! Original */
16562 IX86_BUILTIN_FEMMS,
16563 IX86_BUILTIN_PAVGUSB,
16564 IX86_BUILTIN_PF2ID,
16565 IX86_BUILTIN_PFACC,
16566 IX86_BUILTIN_PFADD,
16567 IX86_BUILTIN_PFCMPEQ,
16568 IX86_BUILTIN_PFCMPGE,
16569 IX86_BUILTIN_PFCMPGT,
16570 IX86_BUILTIN_PFMAX,
16571 IX86_BUILTIN_PFMIN,
16572 IX86_BUILTIN_PFMUL,
16573 IX86_BUILTIN_PFRCP,
16574 IX86_BUILTIN_PFRCPIT1,
16575 IX86_BUILTIN_PFRCPIT2,
16576 IX86_BUILTIN_PFRSQIT1,
16577 IX86_BUILTIN_PFRSQRT,
16578 IX86_BUILTIN_PFSUB,
16579 IX86_BUILTIN_PFSUBR,
16580 IX86_BUILTIN_PI2FD,
16581 IX86_BUILTIN_PMULHRW,
16582
16583 /* 3DNow! Athlon Extensions */
16584 IX86_BUILTIN_PF2IW,
16585 IX86_BUILTIN_PFNACC,
16586 IX86_BUILTIN_PFPNACC,
16587 IX86_BUILTIN_PI2FW,
16588 IX86_BUILTIN_PSWAPDSI,
16589 IX86_BUILTIN_PSWAPDSF,
16590
16591 /* SSE2 */
16592 IX86_BUILTIN_ADDPD,
16593 IX86_BUILTIN_ADDSD,
16594 IX86_BUILTIN_DIVPD,
16595 IX86_BUILTIN_DIVSD,
16596 IX86_BUILTIN_MULPD,
16597 IX86_BUILTIN_MULSD,
16598 IX86_BUILTIN_SUBPD,
16599 IX86_BUILTIN_SUBSD,
16600
16601 IX86_BUILTIN_CMPEQPD,
16602 IX86_BUILTIN_CMPLTPD,
16603 IX86_BUILTIN_CMPLEPD,
16604 IX86_BUILTIN_CMPGTPD,
16605 IX86_BUILTIN_CMPGEPD,
16606 IX86_BUILTIN_CMPNEQPD,
16607 IX86_BUILTIN_CMPNLTPD,
16608 IX86_BUILTIN_CMPNLEPD,
16609 IX86_BUILTIN_CMPNGTPD,
16610 IX86_BUILTIN_CMPNGEPD,
16611 IX86_BUILTIN_CMPORDPD,
16612 IX86_BUILTIN_CMPUNORDPD,
16613 IX86_BUILTIN_CMPEQSD,
16614 IX86_BUILTIN_CMPLTSD,
16615 IX86_BUILTIN_CMPLESD,
16616 IX86_BUILTIN_CMPNEQSD,
16617 IX86_BUILTIN_CMPNLTSD,
16618 IX86_BUILTIN_CMPNLESD,
16619 IX86_BUILTIN_CMPORDSD,
16620 IX86_BUILTIN_CMPUNORDSD,
16621
16622 IX86_BUILTIN_COMIEQSD,
16623 IX86_BUILTIN_COMILTSD,
16624 IX86_BUILTIN_COMILESD,
16625 IX86_BUILTIN_COMIGTSD,
16626 IX86_BUILTIN_COMIGESD,
16627 IX86_BUILTIN_COMINEQSD,
16628 IX86_BUILTIN_UCOMIEQSD,
16629 IX86_BUILTIN_UCOMILTSD,
16630 IX86_BUILTIN_UCOMILESD,
16631 IX86_BUILTIN_UCOMIGTSD,
16632 IX86_BUILTIN_UCOMIGESD,
16633 IX86_BUILTIN_UCOMINEQSD,
16634
16635 IX86_BUILTIN_MAXPD,
16636 IX86_BUILTIN_MAXSD,
16637 IX86_BUILTIN_MINPD,
16638 IX86_BUILTIN_MINSD,
16639
16640 IX86_BUILTIN_ANDPD,
16641 IX86_BUILTIN_ANDNPD,
16642 IX86_BUILTIN_ORPD,
16643 IX86_BUILTIN_XORPD,
16644
16645 IX86_BUILTIN_SQRTPD,
16646 IX86_BUILTIN_SQRTSD,
16647
16648 IX86_BUILTIN_UNPCKHPD,
16649 IX86_BUILTIN_UNPCKLPD,
16650
16651 IX86_BUILTIN_SHUFPD,
16652
16653 IX86_BUILTIN_LOADUPD,
16654 IX86_BUILTIN_STOREUPD,
16655 IX86_BUILTIN_MOVSD,
16656
16657 IX86_BUILTIN_LOADHPD,
16658 IX86_BUILTIN_LOADLPD,
16659
16660 IX86_BUILTIN_CVTDQ2PD,
16661 IX86_BUILTIN_CVTDQ2PS,
16662
16663 IX86_BUILTIN_CVTPD2DQ,
16664 IX86_BUILTIN_CVTPD2PI,
16665 IX86_BUILTIN_CVTPD2PS,
16666 IX86_BUILTIN_CVTTPD2DQ,
16667 IX86_BUILTIN_CVTTPD2PI,
16668
16669 IX86_BUILTIN_CVTPI2PD,
16670 IX86_BUILTIN_CVTSI2SD,
16671 IX86_BUILTIN_CVTSI642SD,
16672
16673 IX86_BUILTIN_CVTSD2SI,
16674 IX86_BUILTIN_CVTSD2SI64,
16675 IX86_BUILTIN_CVTSD2SS,
16676 IX86_BUILTIN_CVTSS2SD,
16677 IX86_BUILTIN_CVTTSD2SI,
16678 IX86_BUILTIN_CVTTSD2SI64,
16679
16680 IX86_BUILTIN_CVTPS2DQ,
16681 IX86_BUILTIN_CVTPS2PD,
16682 IX86_BUILTIN_CVTTPS2DQ,
16683
16684 IX86_BUILTIN_MOVNTI,
16685 IX86_BUILTIN_MOVNTPD,
16686 IX86_BUILTIN_MOVNTDQ,
16687
16688 /* SSE2 MMX */
16689 IX86_BUILTIN_MASKMOVDQU,
16690 IX86_BUILTIN_MOVMSKPD,
16691 IX86_BUILTIN_PMOVMSKB128,
16692
16693 IX86_BUILTIN_PACKSSWB128,
16694 IX86_BUILTIN_PACKSSDW128,
16695 IX86_BUILTIN_PACKUSWB128,
16696
16697 IX86_BUILTIN_PADDB128,
16698 IX86_BUILTIN_PADDW128,
16699 IX86_BUILTIN_PADDD128,
16700 IX86_BUILTIN_PADDQ128,
16701 IX86_BUILTIN_PADDSB128,
16702 IX86_BUILTIN_PADDSW128,
16703 IX86_BUILTIN_PADDUSB128,
16704 IX86_BUILTIN_PADDUSW128,
16705 IX86_BUILTIN_PSUBB128,
16706 IX86_BUILTIN_PSUBW128,
16707 IX86_BUILTIN_PSUBD128,
16708 IX86_BUILTIN_PSUBQ128,
16709 IX86_BUILTIN_PSUBSB128,
16710 IX86_BUILTIN_PSUBSW128,
16711 IX86_BUILTIN_PSUBUSB128,
16712 IX86_BUILTIN_PSUBUSW128,
16713
16714 IX86_BUILTIN_PAND128,
16715 IX86_BUILTIN_PANDN128,
16716 IX86_BUILTIN_POR128,
16717 IX86_BUILTIN_PXOR128,
16718
16719 IX86_BUILTIN_PAVGB128,
16720 IX86_BUILTIN_PAVGW128,
16721
16722 IX86_BUILTIN_PCMPEQB128,
16723 IX86_BUILTIN_PCMPEQW128,
16724 IX86_BUILTIN_PCMPEQD128,
16725 IX86_BUILTIN_PCMPGTB128,
16726 IX86_BUILTIN_PCMPGTW128,
16727 IX86_BUILTIN_PCMPGTD128,
16728
16729 IX86_BUILTIN_PMADDWD128,
16730
16731 IX86_BUILTIN_PMAXSW128,
16732 IX86_BUILTIN_PMAXUB128,
16733 IX86_BUILTIN_PMINSW128,
16734 IX86_BUILTIN_PMINUB128,
16735
16736 IX86_BUILTIN_PMULUDQ,
16737 IX86_BUILTIN_PMULUDQ128,
16738 IX86_BUILTIN_PMULHUW128,
16739 IX86_BUILTIN_PMULHW128,
16740 IX86_BUILTIN_PMULLW128,
16741
16742 IX86_BUILTIN_PSADBW128,
16743 IX86_BUILTIN_PSHUFHW,
16744 IX86_BUILTIN_PSHUFLW,
16745 IX86_BUILTIN_PSHUFD,
16746
16747 IX86_BUILTIN_PSLLDQI128,
16748 IX86_BUILTIN_PSLLWI128,
16749 IX86_BUILTIN_PSLLDI128,
16750 IX86_BUILTIN_PSLLQI128,
16751 IX86_BUILTIN_PSRAWI128,
16752 IX86_BUILTIN_PSRADI128,
16753 IX86_BUILTIN_PSRLDQI128,
16754 IX86_BUILTIN_PSRLWI128,
16755 IX86_BUILTIN_PSRLDI128,
16756 IX86_BUILTIN_PSRLQI128,
16757
16758 IX86_BUILTIN_PSLLDQ128,
16759 IX86_BUILTIN_PSLLW128,
16760 IX86_BUILTIN_PSLLD128,
16761 IX86_BUILTIN_PSLLQ128,
16762 IX86_BUILTIN_PSRAW128,
16763 IX86_BUILTIN_PSRAD128,
16764 IX86_BUILTIN_PSRLW128,
16765 IX86_BUILTIN_PSRLD128,
16766 IX86_BUILTIN_PSRLQ128,
16767
16768 IX86_BUILTIN_PUNPCKHBW128,
16769 IX86_BUILTIN_PUNPCKHWD128,
16770 IX86_BUILTIN_PUNPCKHDQ128,
16771 IX86_BUILTIN_PUNPCKHQDQ128,
16772 IX86_BUILTIN_PUNPCKLBW128,
16773 IX86_BUILTIN_PUNPCKLWD128,
16774 IX86_BUILTIN_PUNPCKLDQ128,
16775 IX86_BUILTIN_PUNPCKLQDQ128,
16776
16777 IX86_BUILTIN_CLFLUSH,
16778 IX86_BUILTIN_MFENCE,
16779 IX86_BUILTIN_LFENCE,
16780
16781 /* Prescott New Instructions. */
16782 IX86_BUILTIN_ADDSUBPS,
16783 IX86_BUILTIN_HADDPS,
16784 IX86_BUILTIN_HSUBPS,
16785 IX86_BUILTIN_MOVSHDUP,
16786 IX86_BUILTIN_MOVSLDUP,
16787 IX86_BUILTIN_ADDSUBPD,
16788 IX86_BUILTIN_HADDPD,
16789 IX86_BUILTIN_HSUBPD,
16790 IX86_BUILTIN_LDDQU,
16791
16792 IX86_BUILTIN_MONITOR,
16793 IX86_BUILTIN_MWAIT,
16794
16795 /* SSSE3. */
16796 IX86_BUILTIN_PHADDW,
16797 IX86_BUILTIN_PHADDD,
16798 IX86_BUILTIN_PHADDSW,
16799 IX86_BUILTIN_PHSUBW,
16800 IX86_BUILTIN_PHSUBD,
16801 IX86_BUILTIN_PHSUBSW,
16802 IX86_BUILTIN_PMADDUBSW,
16803 IX86_BUILTIN_PMULHRSW,
16804 IX86_BUILTIN_PSHUFB,
16805 IX86_BUILTIN_PSIGNB,
16806 IX86_BUILTIN_PSIGNW,
16807 IX86_BUILTIN_PSIGND,
16808 IX86_BUILTIN_PALIGNR,
16809 IX86_BUILTIN_PABSB,
16810 IX86_BUILTIN_PABSW,
16811 IX86_BUILTIN_PABSD,
16812
16813 IX86_BUILTIN_PHADDW128,
16814 IX86_BUILTIN_PHADDD128,
16815 IX86_BUILTIN_PHADDSW128,
16816 IX86_BUILTIN_PHSUBW128,
16817 IX86_BUILTIN_PHSUBD128,
16818 IX86_BUILTIN_PHSUBSW128,
16819 IX86_BUILTIN_PMADDUBSW128,
16820 IX86_BUILTIN_PMULHRSW128,
16821 IX86_BUILTIN_PSHUFB128,
16822 IX86_BUILTIN_PSIGNB128,
16823 IX86_BUILTIN_PSIGNW128,
16824 IX86_BUILTIN_PSIGND128,
16825 IX86_BUILTIN_PALIGNR128,
16826 IX86_BUILTIN_PABSB128,
16827 IX86_BUILTIN_PABSW128,
16828 IX86_BUILTIN_PABSD128,
16829
16830 /* AMDFAM10 - SSE4A New Instructions. */
16831 IX86_BUILTIN_MOVNTSD,
16832 IX86_BUILTIN_MOVNTSS,
16833 IX86_BUILTIN_EXTRQI,
16834 IX86_BUILTIN_EXTRQ,
16835 IX86_BUILTIN_INSERTQI,
16836 IX86_BUILTIN_INSERTQ,
16837
16838 /* SSE4.1. */
16839 IX86_BUILTIN_BLENDPD,
16840 IX86_BUILTIN_BLENDPS,
16841 IX86_BUILTIN_BLENDVPD,
16842 IX86_BUILTIN_BLENDVPS,
16843 IX86_BUILTIN_PBLENDVB128,
16844 IX86_BUILTIN_PBLENDW128,
16845
16846 IX86_BUILTIN_DPPD,
16847 IX86_BUILTIN_DPPS,
16848
16849 IX86_BUILTIN_INSERTPS128,
16850
16851 IX86_BUILTIN_MOVNTDQA,
16852 IX86_BUILTIN_MPSADBW128,
16853 IX86_BUILTIN_PACKUSDW128,
16854 IX86_BUILTIN_PCMPEQQ,
16855 IX86_BUILTIN_PHMINPOSUW128,
16856
16857 IX86_BUILTIN_PMAXSB128,
16858 IX86_BUILTIN_PMAXSD128,
16859 IX86_BUILTIN_PMAXUD128,
16860 IX86_BUILTIN_PMAXUW128,
16861
16862 IX86_BUILTIN_PMINSB128,
16863 IX86_BUILTIN_PMINSD128,
16864 IX86_BUILTIN_PMINUD128,
16865 IX86_BUILTIN_PMINUW128,
16866
16867 IX86_BUILTIN_PMOVSXBW128,
16868 IX86_BUILTIN_PMOVSXBD128,
16869 IX86_BUILTIN_PMOVSXBQ128,
16870 IX86_BUILTIN_PMOVSXWD128,
16871 IX86_BUILTIN_PMOVSXWQ128,
16872 IX86_BUILTIN_PMOVSXDQ128,
16873
16874 IX86_BUILTIN_PMOVZXBW128,
16875 IX86_BUILTIN_PMOVZXBD128,
16876 IX86_BUILTIN_PMOVZXBQ128,
16877 IX86_BUILTIN_PMOVZXWD128,
16878 IX86_BUILTIN_PMOVZXWQ128,
16879 IX86_BUILTIN_PMOVZXDQ128,
16880
16881 IX86_BUILTIN_PMULDQ128,
16882 IX86_BUILTIN_PMULLD128,
16883
16884 IX86_BUILTIN_ROUNDPD,
16885 IX86_BUILTIN_ROUNDPS,
16886 IX86_BUILTIN_ROUNDSD,
16887 IX86_BUILTIN_ROUNDSS,
16888
16889 IX86_BUILTIN_PTESTZ,
16890 IX86_BUILTIN_PTESTC,
16891 IX86_BUILTIN_PTESTNZC,
16892
16893 IX86_BUILTIN_VEC_INIT_V2SI,
16894 IX86_BUILTIN_VEC_INIT_V4HI,
16895 IX86_BUILTIN_VEC_INIT_V8QI,
16896 IX86_BUILTIN_VEC_EXT_V2DF,
16897 IX86_BUILTIN_VEC_EXT_V2DI,
16898 IX86_BUILTIN_VEC_EXT_V4SF,
16899 IX86_BUILTIN_VEC_EXT_V4SI,
16900 IX86_BUILTIN_VEC_EXT_V8HI,
16901 IX86_BUILTIN_VEC_EXT_V2SI,
16902 IX86_BUILTIN_VEC_EXT_V4HI,
16903 IX86_BUILTIN_VEC_EXT_V16QI,
16904 IX86_BUILTIN_VEC_SET_V2DI,
16905 IX86_BUILTIN_VEC_SET_V4SF,
16906 IX86_BUILTIN_VEC_SET_V4SI,
16907 IX86_BUILTIN_VEC_SET_V8HI,
16908 IX86_BUILTIN_VEC_SET_V4HI,
16909 IX86_BUILTIN_VEC_SET_V16QI,
16910
16911 IX86_BUILTIN_VEC_PACK_SFIX,
16912
16913 /* SSE4.2. */
16914 IX86_BUILTIN_CRC32QI,
16915 IX86_BUILTIN_CRC32HI,
16916 IX86_BUILTIN_CRC32SI,
16917 IX86_BUILTIN_CRC32DI,
16918
16919 IX86_BUILTIN_PCMPESTRI128,
16920 IX86_BUILTIN_PCMPESTRM128,
16921 IX86_BUILTIN_PCMPESTRA128,
16922 IX86_BUILTIN_PCMPESTRC128,
16923 IX86_BUILTIN_PCMPESTRO128,
16924 IX86_BUILTIN_PCMPESTRS128,
16925 IX86_BUILTIN_PCMPESTRZ128,
16926 IX86_BUILTIN_PCMPISTRI128,
16927 IX86_BUILTIN_PCMPISTRM128,
16928 IX86_BUILTIN_PCMPISTRA128,
16929 IX86_BUILTIN_PCMPISTRC128,
16930 IX86_BUILTIN_PCMPISTRO128,
16931 IX86_BUILTIN_PCMPISTRS128,
16932 IX86_BUILTIN_PCMPISTRZ128,
16933
16934 IX86_BUILTIN_PCMPGTQ,
16935
16936 /* TFmode support builtins. */
16937 IX86_BUILTIN_INFQ,
16938 IX86_BUILTIN_FABSQ,
16939 IX86_BUILTIN_COPYSIGNQ,
16940
16941 IX86_BUILTIN_MAX
16942 };
16943
16944 /* Table for the ix86 builtin decls. */
16945 static GTY(()) tree ix86_builtins[(int) IX86_BUILTIN_MAX];
16946
16947 /* Add an ix86 target builtin function with CODE, NAME and TYPE. Do so,
16948 * if the target_flags include one of MASK. Stores the function decl
16949 * in the ix86_builtins array.
16950 * Returns the function decl or NULL_TREE, if the builtin was not added. */
16951
16952 static inline tree
16953 def_builtin (int mask, const char *name, tree type, enum ix86_builtins code)
16954 {
16955 tree decl = NULL_TREE;
16956
16957 if (mask & ix86_isa_flags
16958 && (!(mask & OPTION_MASK_ISA_64BIT) || TARGET_64BIT))
16959 {
16960 decl = add_builtin_function (name, type, code, BUILT_IN_MD,
16961 NULL, NULL_TREE);
16962 ix86_builtins[(int) code] = decl;
16963 }
16964
16965 return decl;
16966 }
16967
16968 /* Like def_builtin, but also marks the function decl "const". */
16969
16970 static inline tree
16971 def_builtin_const (int mask, const char *name, tree type,
16972 enum ix86_builtins code)
16973 {
16974 tree decl = def_builtin (mask, name, type, code);
16975 if (decl)
16976 TREE_READONLY (decl) = 1;
16977 return decl;
16978 }
16979
16980 /* Bits for builtin_description.flag. */
16981
16982 /* Set when we don't support the comparison natively, and should
16983 swap_comparison in order to support it. */
16984 #define BUILTIN_DESC_SWAP_OPERANDS 1
16985
16986 struct builtin_description
16987 {
16988 const unsigned int mask;
16989 const enum insn_code icode;
16990 const char *const name;
16991 const enum ix86_builtins code;
16992 const enum rtx_code comparison;
16993 const int flag;
16994 };
16995
16996 static const struct builtin_description bdesc_comi[] =
16997 {
16998 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comieq", IX86_BUILTIN_COMIEQSS, UNEQ, 0 },
16999 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comilt", IX86_BUILTIN_COMILTSS, UNLT, 0 },
17000 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comile", IX86_BUILTIN_COMILESS, UNLE, 0 },
17001 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comigt", IX86_BUILTIN_COMIGTSS, GT, 0 },
17002 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comige", IX86_BUILTIN_COMIGESS, GE, 0 },
17003 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_comi, "__builtin_ia32_comineq", IX86_BUILTIN_COMINEQSS, LTGT, 0 },
17004 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomieq", IX86_BUILTIN_UCOMIEQSS, UNEQ, 0 },
17005 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomilt", IX86_BUILTIN_UCOMILTSS, UNLT, 0 },
17006 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomile", IX86_BUILTIN_UCOMILESS, UNLE, 0 },
17007 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomigt", IX86_BUILTIN_UCOMIGTSS, GT, 0 },
17008 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomige", IX86_BUILTIN_UCOMIGESS, GE, 0 },
17009 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_ucomi, "__builtin_ia32_ucomineq", IX86_BUILTIN_UCOMINEQSS, LTGT, 0 },
17010 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdeq", IX86_BUILTIN_COMIEQSD, UNEQ, 0 },
17011 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdlt", IX86_BUILTIN_COMILTSD, UNLT, 0 },
17012 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdle", IX86_BUILTIN_COMILESD, UNLE, 0 },
17013 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdgt", IX86_BUILTIN_COMIGTSD, GT, 0 },
17014 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdge", IX86_BUILTIN_COMIGESD, GE, 0 },
17015 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_comi, "__builtin_ia32_comisdneq", IX86_BUILTIN_COMINEQSD, LTGT, 0 },
17016 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdeq", IX86_BUILTIN_UCOMIEQSD, UNEQ, 0 },
17017 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdlt", IX86_BUILTIN_UCOMILTSD, UNLT, 0 },
17018 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdle", IX86_BUILTIN_UCOMILESD, UNLE, 0 },
17019 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdgt", IX86_BUILTIN_UCOMIGTSD, GT, 0 },
17020 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdge", IX86_BUILTIN_UCOMIGESD, GE, 0 },
17021 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_ucomi, "__builtin_ia32_ucomisdneq", IX86_BUILTIN_UCOMINEQSD, LTGT, 0 },
17022 };
17023
17024 static const struct builtin_description bdesc_ptest[] =
17025 {
17026 /* SSE4.1 */
17027 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestz128", IX86_BUILTIN_PTESTZ, EQ, 0 },
17028 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestc128", IX86_BUILTIN_PTESTC, LTU, 0 },
17029 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_ptest, "__builtin_ia32_ptestnzc128", IX86_BUILTIN_PTESTNZC, GTU, 0 },
17030 };
17031
17032 static const struct builtin_description bdesc_pcmpestr[] =
17033 {
17034 /* SSE4.2 */
17035 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestri128", IX86_BUILTIN_PCMPESTRI128, UNKNOWN, 0 },
17036 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrm128", IX86_BUILTIN_PCMPESTRM128, UNKNOWN, 0 },
17037 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestria128", IX86_BUILTIN_PCMPESTRA128, UNKNOWN, (int) CCAmode },
17038 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestric128", IX86_BUILTIN_PCMPESTRC128, UNKNOWN, (int) CCCmode },
17039 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestrio128", IX86_BUILTIN_PCMPESTRO128, UNKNOWN, (int) CCOmode },
17040 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestris128", IX86_BUILTIN_PCMPESTRS128, UNKNOWN, (int) CCSmode },
17041 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpestr, "__builtin_ia32_pcmpestriz128", IX86_BUILTIN_PCMPESTRZ128, UNKNOWN, (int) CCZmode },
17042 };
17043
17044 static const struct builtin_description bdesc_pcmpistr[] =
17045 {
17046 /* SSE4.2 */
17047 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistri128", IX86_BUILTIN_PCMPISTRI128, UNKNOWN, 0 },
17048 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrm128", IX86_BUILTIN_PCMPISTRM128, UNKNOWN, 0 },
17049 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistria128", IX86_BUILTIN_PCMPISTRA128, UNKNOWN, (int) CCAmode },
17050 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistric128", IX86_BUILTIN_PCMPISTRC128, UNKNOWN, (int) CCCmode },
17051 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistrio128", IX86_BUILTIN_PCMPISTRO128, UNKNOWN, (int) CCOmode },
17052 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistris128", IX86_BUILTIN_PCMPISTRS128, UNKNOWN, (int) CCSmode },
17053 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_pcmpistr, "__builtin_ia32_pcmpistriz128", IX86_BUILTIN_PCMPISTRZ128, UNKNOWN, (int) CCZmode },
17054 };
17055
17056 static const struct builtin_description bdesc_crc32[] =
17057 {
17058 /* SSE4.2 */
17059 { OPTION_MASK_ISA_SSE4_2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse4_2_crc32qi, 0, IX86_BUILTIN_CRC32QI, UNKNOWN, 0 },
17060 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32hi, 0, IX86_BUILTIN_CRC32HI, UNKNOWN, 0 },
17061 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32si, 0, IX86_BUILTIN_CRC32SI, UNKNOWN, 0 },
17062 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_crc32di, 0, IX86_BUILTIN_CRC32DI, UNKNOWN, 0 },
17063 };
17064
17065 /* SSE builtins with 3 arguments and the last argument must be an immediate or xmm0. */
17066 static const struct builtin_description bdesc_sse_3arg[] =
17067 {
17068 /* SSE4.1 */
17069 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, 0 },
17070 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendps, "__builtin_ia32_blendps", IX86_BUILTIN_BLENDPS, UNKNOWN, 0 },
17071 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvpd, "__builtin_ia32_blendvpd", IX86_BUILTIN_BLENDVPD, UNKNOWN, 0 },
17072 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendvps, "__builtin_ia32_blendvps", IX86_BUILTIN_BLENDVPS, UNKNOWN, 0 },
17073 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dppd, "__builtin_ia32_dppd", IX86_BUILTIN_DPPD, UNKNOWN, 0 },
17074 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_dpps, "__builtin_ia32_dpps", IX86_BUILTIN_DPPS, UNKNOWN, 0 },
17075 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_insertps, "__builtin_ia32_insertps128", IX86_BUILTIN_INSERTPS128, UNKNOWN, 0 },
17076 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mpsadbw, "__builtin_ia32_mpsadbw128", IX86_BUILTIN_MPSADBW128, UNKNOWN, 0 },
17077 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendvb, "__builtin_ia32_pblendvb128", IX86_BUILTIN_PBLENDVB128, UNKNOWN, 0 },
17078 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_pblendw, "__builtin_ia32_pblendw128", IX86_BUILTIN_PBLENDW128, UNKNOWN, 0 },
17079 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundsd, 0, IX86_BUILTIN_ROUNDSD, UNKNOWN, 0 },
17080 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundss, 0, IX86_BUILTIN_ROUNDSS, UNKNOWN, 0 },
17081 };
17082
17083 static const struct builtin_description bdesc_2arg[] =
17084 {
17085 /* SSE */
17086 { OPTION_MASK_ISA_SSE, CODE_FOR_addv4sf3, "__builtin_ia32_addps", IX86_BUILTIN_ADDPS, UNKNOWN, 0 },
17087 { OPTION_MASK_ISA_SSE, CODE_FOR_subv4sf3, "__builtin_ia32_subps", IX86_BUILTIN_SUBPS, UNKNOWN, 0 },
17088 { OPTION_MASK_ISA_SSE, CODE_FOR_mulv4sf3, "__builtin_ia32_mulps", IX86_BUILTIN_MULPS, UNKNOWN, 0 },
17089 { OPTION_MASK_ISA_SSE, CODE_FOR_divv4sf3, "__builtin_ia32_divps", IX86_BUILTIN_DIVPS, UNKNOWN, 0 },
17090 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmaddv4sf3, "__builtin_ia32_addss", IX86_BUILTIN_ADDSS, UNKNOWN, 0 },
17091 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsubv4sf3, "__builtin_ia32_subss", IX86_BUILTIN_SUBSS, UNKNOWN, 0 },
17092 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmulv4sf3, "__builtin_ia32_mulss", IX86_BUILTIN_MULSS, UNKNOWN, 0 },
17093 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmdivv4sf3, "__builtin_ia32_divss", IX86_BUILTIN_DIVSS, UNKNOWN, 0 },
17094
17095 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpeqps", IX86_BUILTIN_CMPEQPS, EQ, 0 },
17096 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpltps", IX86_BUILTIN_CMPLTPS, LT, 0 },
17097 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpleps", IX86_BUILTIN_CMPLEPS, LE, 0 },
17098 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgtps", IX86_BUILTIN_CMPGTPS, LT, BUILTIN_DESC_SWAP_OPERANDS },
17099 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpgeps", IX86_BUILTIN_CMPGEPS, LE, BUILTIN_DESC_SWAP_OPERANDS },
17100 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpunordps", IX86_BUILTIN_CMPUNORDPS, UNORDERED, 0 },
17101 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpneqps", IX86_BUILTIN_CMPNEQPS, NE, 0 },
17102 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnltps", IX86_BUILTIN_CMPNLTPS, UNGE, 0 },
17103 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpnleps", IX86_BUILTIN_CMPNLEPS, UNGT, 0 },
17104 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngtps", IX86_BUILTIN_CMPNGTPS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17105 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpngeps", IX86_BUILTIN_CMPNGEPS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17106 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_maskcmpv4sf3, "__builtin_ia32_cmpordps", IX86_BUILTIN_CMPORDPS, ORDERED, 0 },
17107 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpeqss", IX86_BUILTIN_CMPEQSS, EQ, 0 },
17108 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpltss", IX86_BUILTIN_CMPLTSS, LT, 0 },
17109 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpless", IX86_BUILTIN_CMPLESS, LE, 0 },
17110 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpunordss", IX86_BUILTIN_CMPUNORDSS, UNORDERED, 0 },
17111 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpneqss", IX86_BUILTIN_CMPNEQSS, NE, 0 },
17112 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnltss", IX86_BUILTIN_CMPNLTSS, UNGE, 0 },
17113 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpnless", IX86_BUILTIN_CMPNLESS, UNGT, 0 },
17114 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngtss", IX86_BUILTIN_CMPNGTSS, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17115 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpngess", IX86_BUILTIN_CMPNGESS, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17116 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmmaskcmpv4sf3, "__builtin_ia32_cmpordss", IX86_BUILTIN_CMPORDSS, ORDERED, 0 },
17117
17118 { OPTION_MASK_ISA_SSE, CODE_FOR_sminv4sf3, "__builtin_ia32_minps", IX86_BUILTIN_MINPS, UNKNOWN, 0 },
17119 { OPTION_MASK_ISA_SSE, CODE_FOR_smaxv4sf3, "__builtin_ia32_maxps", IX86_BUILTIN_MAXPS, UNKNOWN, 0 },
17120 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsminv4sf3, "__builtin_ia32_minss", IX86_BUILTIN_MINSS, UNKNOWN, 0 },
17121 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_vmsmaxv4sf3, "__builtin_ia32_maxss", IX86_BUILTIN_MAXSS, UNKNOWN, 0 },
17122
17123 { OPTION_MASK_ISA_SSE, CODE_FOR_andv4sf3, "__builtin_ia32_andps", IX86_BUILTIN_ANDPS, UNKNOWN, 0 },
17124 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_nandv4sf3, "__builtin_ia32_andnps", IX86_BUILTIN_ANDNPS, UNKNOWN, 0 },
17125 { OPTION_MASK_ISA_SSE, CODE_FOR_iorv4sf3, "__builtin_ia32_orps", IX86_BUILTIN_ORPS, UNKNOWN, 0 },
17126 { OPTION_MASK_ISA_SSE, CODE_FOR_xorv4sf3, "__builtin_ia32_xorps", IX86_BUILTIN_XORPS, UNKNOWN, 0 },
17127
17128 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movss, "__builtin_ia32_movss", IX86_BUILTIN_MOVSS, UNKNOWN, 0 },
17129 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movhlps, "__builtin_ia32_movhlps", IX86_BUILTIN_MOVHLPS, UNKNOWN, 0 },
17130 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps, "__builtin_ia32_movlhps", IX86_BUILTIN_MOVLHPS, UNKNOWN, 0 },
17131 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpckhps, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, 0 },
17132 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_unpcklps, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, 0 },
17133
17134 /* MMX */
17135 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv8qi3, "__builtin_ia32_paddb", IX86_BUILTIN_PADDB, UNKNOWN, 0 },
17136 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv4hi3, "__builtin_ia32_paddw", IX86_BUILTIN_PADDW, UNKNOWN, 0 },
17137 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv2si3, "__builtin_ia32_paddd", IX86_BUILTIN_PADDD, UNKNOWN, 0 },
17138 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_adddi3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, 0 },
17139 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv8qi3, "__builtin_ia32_psubb", IX86_BUILTIN_PSUBB, UNKNOWN, 0 },
17140 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv4hi3, "__builtin_ia32_psubw", IX86_BUILTIN_PSUBW, UNKNOWN, 0 },
17141 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv2si3, "__builtin_ia32_psubd", IX86_BUILTIN_PSUBD, UNKNOWN, 0 },
17142 { OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subdi3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, 0 },
17143
17144 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv8qi3, "__builtin_ia32_paddsb", IX86_BUILTIN_PADDSB, UNKNOWN, 0 },
17145 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ssaddv4hi3, "__builtin_ia32_paddsw", IX86_BUILTIN_PADDSW, UNKNOWN, 0 },
17146 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv8qi3, "__builtin_ia32_psubsb", IX86_BUILTIN_PSUBSB, UNKNOWN, 0 },
17147 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sssubv4hi3, "__builtin_ia32_psubsw", IX86_BUILTIN_PSUBSW, UNKNOWN, 0 },
17148 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv8qi3, "__builtin_ia32_paddusb", IX86_BUILTIN_PADDUSB, UNKNOWN, 0 },
17149 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_usaddv4hi3, "__builtin_ia32_paddusw", IX86_BUILTIN_PADDUSW, UNKNOWN, 0 },
17150 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv8qi3, "__builtin_ia32_psubusb", IX86_BUILTIN_PSUBUSB, UNKNOWN, 0 },
17151 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ussubv4hi3, "__builtin_ia32_psubusw", IX86_BUILTIN_PSUBUSW, UNKNOWN, 0 },
17152
17153 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_mulv4hi3, "__builtin_ia32_pmullw", IX86_BUILTIN_PMULLW, UNKNOWN, 0 },
17154 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smulv4hi3_highpart, "__builtin_ia32_pmulhw", IX86_BUILTIN_PMULHW, UNKNOWN, 0 },
17155 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, 0 },
17156
17157 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_andv2si3, "__builtin_ia32_pand", IX86_BUILTIN_PAND, UNKNOWN, 0 },
17158 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_nandv2si3, "__builtin_ia32_pandn", IX86_BUILTIN_PANDN, UNKNOWN, 0 },
17159 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_iorv2si3, "__builtin_ia32_por", IX86_BUILTIN_POR, UNKNOWN, 0 },
17160 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_xorv2si3, "__builtin_ia32_pxor", IX86_BUILTIN_PXOR, UNKNOWN, 0 },
17161
17162 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, 0 },
17163 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, 0 },
17164
17165 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv8qi3, "__builtin_ia32_pcmpeqb", IX86_BUILTIN_PCMPEQB, UNKNOWN, 0 },
17166 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv4hi3, "__builtin_ia32_pcmpeqw", IX86_BUILTIN_PCMPEQW, UNKNOWN, 0 },
17167 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_eqv2si3, "__builtin_ia32_pcmpeqd", IX86_BUILTIN_PCMPEQD, UNKNOWN, 0 },
17168 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv8qi3, "__builtin_ia32_pcmpgtb", IX86_BUILTIN_PCMPGTB, UNKNOWN, 0 },
17169 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv4hi3, "__builtin_ia32_pcmpgtw", IX86_BUILTIN_PCMPGTW, UNKNOWN, 0 },
17170 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_gtv2si3, "__builtin_ia32_pcmpgtd", IX86_BUILTIN_PCMPGTD, UNKNOWN, 0 },
17171
17172 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, 0 },
17173 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, 0 },
17174 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, 0 },
17175 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, 0 },
17176
17177 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhbw, "__builtin_ia32_punpckhbw", IX86_BUILTIN_PUNPCKHBW, UNKNOWN, 0 },
17178 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhwd, "__builtin_ia32_punpckhwd", IX86_BUILTIN_PUNPCKHWD, UNKNOWN, 0 },
17179 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckhdq, "__builtin_ia32_punpckhdq", IX86_BUILTIN_PUNPCKHDQ, UNKNOWN, 0 },
17180 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklbw, "__builtin_ia32_punpcklbw", IX86_BUILTIN_PUNPCKLBW, UNKNOWN, 0 },
17181 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpcklwd, "__builtin_ia32_punpcklwd", IX86_BUILTIN_PUNPCKLWD, UNKNOWN, 0 },
17182 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_punpckldq, "__builtin_ia32_punpckldq", IX86_BUILTIN_PUNPCKLDQ, UNKNOWN, 0 },
17183
17184 /* Special. */
17185 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packsswb, 0, IX86_BUILTIN_PACKSSWB, UNKNOWN, 0 },
17186 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packssdw, 0, IX86_BUILTIN_PACKSSDW, UNKNOWN, 0 },
17187 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_packuswb, 0, IX86_BUILTIN_PACKUSWB, UNKNOWN, 0 },
17188
17189 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, 0, IX86_BUILTIN_CVTPI2PS, UNKNOWN, 0 },
17190 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, 0, IX86_BUILTIN_CVTSI2SS, UNKNOWN, 0 },
17191 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, 0, IX86_BUILTIN_CVTSI642SS, UNKNOWN, 0 },
17192
17193 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLW, UNKNOWN, 0 },
17194 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv4hi3, 0, IX86_BUILTIN_PSLLWI, UNKNOWN, 0 },
17195 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLD, UNKNOWN, 0 },
17196 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashlv2si3, 0, IX86_BUILTIN_PSLLDI, UNKNOWN, 0 },
17197 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQ, UNKNOWN, 0 },
17198 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashldi3, 0, IX86_BUILTIN_PSLLQI, UNKNOWN, 0 },
17199
17200 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLW, UNKNOWN, 0 },
17201 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv4hi3, 0, IX86_BUILTIN_PSRLWI, UNKNOWN, 0 },
17202 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLD, UNKNOWN, 0 },
17203 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrv2si3, 0, IX86_BUILTIN_PSRLDI, UNKNOWN, 0 },
17204 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQ, UNKNOWN, 0 },
17205 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_lshrdi3, 0, IX86_BUILTIN_PSRLQI, UNKNOWN, 0 },
17206
17207 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAW, UNKNOWN, 0 },
17208 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv4hi3, 0, IX86_BUILTIN_PSRAWI, UNKNOWN, 0 },
17209 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRAD, UNKNOWN, 0 },
17210 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_ashrv2si3, 0, IX86_BUILTIN_PSRADI, UNKNOWN, 0 },
17211
17212 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, 0, IX86_BUILTIN_PSADBW, UNKNOWN, 0 },
17213 { OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmaddwd, 0, IX86_BUILTIN_PMADDWD, UNKNOWN, 0 },
17214
17215 /* SSE2 */
17216 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2df3, "__builtin_ia32_addpd", IX86_BUILTIN_ADDPD, UNKNOWN, 0 },
17217 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2df3, "__builtin_ia32_subpd", IX86_BUILTIN_SUBPD, UNKNOWN, 0 },
17218 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv2df3, "__builtin_ia32_mulpd", IX86_BUILTIN_MULPD, UNKNOWN, 0 },
17219 { OPTION_MASK_ISA_SSE2, CODE_FOR_divv2df3, "__builtin_ia32_divpd", IX86_BUILTIN_DIVPD, UNKNOWN, 0 },
17220 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmaddv2df3, "__builtin_ia32_addsd", IX86_BUILTIN_ADDSD, UNKNOWN, 0 },
17221 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsubv2df3, "__builtin_ia32_subsd", IX86_BUILTIN_SUBSD, UNKNOWN, 0 },
17222 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmulv2df3, "__builtin_ia32_mulsd", IX86_BUILTIN_MULSD, UNKNOWN, 0 },
17223 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmdivv2df3, "__builtin_ia32_divsd", IX86_BUILTIN_DIVSD, UNKNOWN, 0 },
17224
17225 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpeqpd", IX86_BUILTIN_CMPEQPD, EQ, 0 },
17226 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpltpd", IX86_BUILTIN_CMPLTPD, LT, 0 },
17227 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmplepd", IX86_BUILTIN_CMPLEPD, LE, 0 },
17228 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgtpd", IX86_BUILTIN_CMPGTPD, LT, BUILTIN_DESC_SWAP_OPERANDS },
17229 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpgepd", IX86_BUILTIN_CMPGEPD, LE, BUILTIN_DESC_SWAP_OPERANDS },
17230 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpunordpd", IX86_BUILTIN_CMPUNORDPD, UNORDERED, 0 },
17231 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpneqpd", IX86_BUILTIN_CMPNEQPD, NE, 0 },
17232 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnltpd", IX86_BUILTIN_CMPNLTPD, UNGE, 0 },
17233 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpnlepd", IX86_BUILTIN_CMPNLEPD, UNGT, 0 },
17234 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngtpd", IX86_BUILTIN_CMPNGTPD, UNGE, BUILTIN_DESC_SWAP_OPERANDS },
17235 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpngepd", IX86_BUILTIN_CMPNGEPD, UNGT, BUILTIN_DESC_SWAP_OPERANDS },
17236 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_maskcmpv2df3, "__builtin_ia32_cmpordpd", IX86_BUILTIN_CMPORDPD, ORDERED, 0 },
17237 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpeqsd", IX86_BUILTIN_CMPEQSD, EQ, 0 },
17238 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpltsd", IX86_BUILTIN_CMPLTSD, LT, 0 },
17239 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmplesd", IX86_BUILTIN_CMPLESD, LE, 0 },
17240 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpunordsd", IX86_BUILTIN_CMPUNORDSD, UNORDERED, 0 },
17241 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpneqsd", IX86_BUILTIN_CMPNEQSD, NE, 0 },
17242 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnltsd", IX86_BUILTIN_CMPNLTSD, UNGE, 0 },
17243 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpnlesd", IX86_BUILTIN_CMPNLESD, UNGT, 0 },
17244 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmmaskcmpv2df3, "__builtin_ia32_cmpordsd", IX86_BUILTIN_CMPORDSD, ORDERED, 0 },
17245
17246 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv2df3, "__builtin_ia32_minpd", IX86_BUILTIN_MINPD, UNKNOWN, 0 },
17247 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv2df3, "__builtin_ia32_maxpd", IX86_BUILTIN_MAXPD, UNKNOWN, 0 },
17248 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsminv2df3, "__builtin_ia32_minsd", IX86_BUILTIN_MINSD, UNKNOWN, 0 },
17249 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsmaxv2df3, "__builtin_ia32_maxsd", IX86_BUILTIN_MAXSD, UNKNOWN, 0 },
17250
17251 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2df3, "__builtin_ia32_andpd", IX86_BUILTIN_ANDPD, UNKNOWN, 0 },
17252 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2df3, "__builtin_ia32_andnpd", IX86_BUILTIN_ANDNPD, UNKNOWN, 0 },
17253 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2df3, "__builtin_ia32_orpd", IX86_BUILTIN_ORPD, UNKNOWN, 0 },
17254 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2df3, "__builtin_ia32_xorpd", IX86_BUILTIN_XORPD, UNKNOWN, 0 },
17255
17256 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movsd, "__builtin_ia32_movsd", IX86_BUILTIN_MOVSD, UNKNOWN, 0 },
17257 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpckhpd, "__builtin_ia32_unpckhpd", IX86_BUILTIN_UNPCKHPD, UNKNOWN, 0 },
17258 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_unpcklpd, "__builtin_ia32_unpcklpd", IX86_BUILTIN_UNPCKLPD, UNKNOWN, 0 },
17259
17260 { OPTION_MASK_ISA_SSE2, CODE_FOR_vec_pack_sfix_v2df, "__builtin_ia32_vec_pack_sfix", IX86_BUILTIN_VEC_PACK_SFIX, UNKNOWN, 0 },
17261
17262 /* SSE2 MMX */
17263 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv16qi3, "__builtin_ia32_paddb128", IX86_BUILTIN_PADDB128, UNKNOWN, 0 },
17264 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv8hi3, "__builtin_ia32_paddw128", IX86_BUILTIN_PADDW128, UNKNOWN, 0 },
17265 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv4si3, "__builtin_ia32_paddd128", IX86_BUILTIN_PADDD128, UNKNOWN, 0 },
17266 { OPTION_MASK_ISA_SSE2, CODE_FOR_addv2di3, "__builtin_ia32_paddq128", IX86_BUILTIN_PADDQ128, UNKNOWN, 0 },
17267 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv16qi3, "__builtin_ia32_psubb128", IX86_BUILTIN_PSUBB128, UNKNOWN, 0 },
17268 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv8hi3, "__builtin_ia32_psubw128", IX86_BUILTIN_PSUBW128, UNKNOWN, 0 },
17269 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv4si3, "__builtin_ia32_psubd128", IX86_BUILTIN_PSUBD128, UNKNOWN, 0 },
17270 { OPTION_MASK_ISA_SSE2, CODE_FOR_subv2di3, "__builtin_ia32_psubq128", IX86_BUILTIN_PSUBQ128, UNKNOWN, 0 },
17271
17272 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv16qi3, "__builtin_ia32_paddsb128", IX86_BUILTIN_PADDSB128, UNKNOWN, 0 },
17273 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ssaddv8hi3, "__builtin_ia32_paddsw128", IX86_BUILTIN_PADDSW128, UNKNOWN, 0 },
17274 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv16qi3, "__builtin_ia32_psubsb128", IX86_BUILTIN_PSUBSB128, UNKNOWN, 0 },
17275 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_sssubv8hi3, "__builtin_ia32_psubsw128", IX86_BUILTIN_PSUBSW128, UNKNOWN, 0 },
17276 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv16qi3, "__builtin_ia32_paddusb128", IX86_BUILTIN_PADDUSB128, UNKNOWN, 0 },
17277 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_usaddv8hi3, "__builtin_ia32_paddusw128", IX86_BUILTIN_PADDUSW128, UNKNOWN, 0 },
17278 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv16qi3, "__builtin_ia32_psubusb128", IX86_BUILTIN_PSUBUSB128, UNKNOWN, 0 },
17279 { OPTION_MASK_ISA_MMX, CODE_FOR_sse2_ussubv8hi3, "__builtin_ia32_psubusw128", IX86_BUILTIN_PSUBUSW128, UNKNOWN, 0 },
17280
17281 { OPTION_MASK_ISA_SSE2, CODE_FOR_mulv8hi3, "__builtin_ia32_pmullw128", IX86_BUILTIN_PMULLW128, UNKNOWN, 0 },
17282 { OPTION_MASK_ISA_SSE2, CODE_FOR_smulv8hi3_highpart, "__builtin_ia32_pmulhw128", IX86_BUILTIN_PMULHW128, UNKNOWN, 0 },
17283
17284 { OPTION_MASK_ISA_SSE2, CODE_FOR_andv2di3, "__builtin_ia32_pand128", IX86_BUILTIN_PAND128, UNKNOWN, 0 },
17285 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_nandv2di3, "__builtin_ia32_pandn128", IX86_BUILTIN_PANDN128, UNKNOWN, 0 },
17286 { OPTION_MASK_ISA_SSE2, CODE_FOR_iorv2di3, "__builtin_ia32_por128", IX86_BUILTIN_POR128, UNKNOWN, 0 },
17287 { OPTION_MASK_ISA_SSE2, CODE_FOR_xorv2di3, "__builtin_ia32_pxor128", IX86_BUILTIN_PXOR128, UNKNOWN, 0 },
17288
17289 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv16qi3, "__builtin_ia32_pavgb128", IX86_BUILTIN_PAVGB128, UNKNOWN, 0 },
17290 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_uavgv8hi3, "__builtin_ia32_pavgw128", IX86_BUILTIN_PAVGW128, UNKNOWN, 0 },
17291
17292 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv16qi3, "__builtin_ia32_pcmpeqb128", IX86_BUILTIN_PCMPEQB128, UNKNOWN, 0 },
17293 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv8hi3, "__builtin_ia32_pcmpeqw128", IX86_BUILTIN_PCMPEQW128, UNKNOWN, 0 },
17294 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_eqv4si3, "__builtin_ia32_pcmpeqd128", IX86_BUILTIN_PCMPEQD128, UNKNOWN, 0 },
17295 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv16qi3, "__builtin_ia32_pcmpgtb128", IX86_BUILTIN_PCMPGTB128, UNKNOWN, 0 },
17296 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv8hi3, "__builtin_ia32_pcmpgtw128", IX86_BUILTIN_PCMPGTW128, UNKNOWN, 0 },
17297 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_gtv4si3, "__builtin_ia32_pcmpgtd128", IX86_BUILTIN_PCMPGTD128, UNKNOWN, 0 },
17298
17299 { OPTION_MASK_ISA_SSE2, CODE_FOR_umaxv16qi3, "__builtin_ia32_pmaxub128", IX86_BUILTIN_PMAXUB128, UNKNOWN, 0 },
17300 { OPTION_MASK_ISA_SSE2, CODE_FOR_smaxv8hi3, "__builtin_ia32_pmaxsw128", IX86_BUILTIN_PMAXSW128, UNKNOWN, 0 },
17301 { OPTION_MASK_ISA_SSE2, CODE_FOR_uminv16qi3, "__builtin_ia32_pminub128", IX86_BUILTIN_PMINUB128, UNKNOWN, 0 },
17302 { OPTION_MASK_ISA_SSE2, CODE_FOR_sminv8hi3, "__builtin_ia32_pminsw128", IX86_BUILTIN_PMINSW128, UNKNOWN, 0 },
17303
17304 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhbw, "__builtin_ia32_punpckhbw128", IX86_BUILTIN_PUNPCKHBW128, UNKNOWN, 0 },
17305 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhwd, "__builtin_ia32_punpckhwd128", IX86_BUILTIN_PUNPCKHWD128, UNKNOWN, 0 },
17306 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhdq, "__builtin_ia32_punpckhdq128", IX86_BUILTIN_PUNPCKHDQ128, UNKNOWN, 0 },
17307 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckhqdq, "__builtin_ia32_punpckhqdq128", IX86_BUILTIN_PUNPCKHQDQ128, UNKNOWN, 0 },
17308 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklbw, "__builtin_ia32_punpcklbw128", IX86_BUILTIN_PUNPCKLBW128, UNKNOWN, 0 },
17309 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklwd, "__builtin_ia32_punpcklwd128", IX86_BUILTIN_PUNPCKLWD128, UNKNOWN, 0 },
17310 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpckldq, "__builtin_ia32_punpckldq128", IX86_BUILTIN_PUNPCKLDQ128, UNKNOWN, 0 },
17311 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_punpcklqdq, "__builtin_ia32_punpcklqdq128", IX86_BUILTIN_PUNPCKLQDQ128, UNKNOWN, 0 },
17312
17313 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packsswb, "__builtin_ia32_packsswb128", IX86_BUILTIN_PACKSSWB128, UNKNOWN, 0 },
17314 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packssdw, "__builtin_ia32_packssdw128", IX86_BUILTIN_PACKSSDW128, UNKNOWN, 0 },
17315 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128", IX86_BUILTIN_PACKUSWB128, UNKNOWN, 0 },
17316
17317 { OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, 0 },
17318 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, 0, IX86_BUILTIN_PSADBW128, UNKNOWN, 0 },
17319
17320 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulsidi3, 0, IX86_BUILTIN_PMULUDQ, UNKNOWN, 0 },
17321 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv2siv2di3, 0, IX86_BUILTIN_PMULUDQ128, UNKNOWN, 0 },
17322
17323 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv8hi3, 0, IX86_BUILTIN_PSLLWI128, UNKNOWN, 0 },
17324 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv4si3, 0, IX86_BUILTIN_PSLLDI128, UNKNOWN, 0 },
17325 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashlv2di3, 0, IX86_BUILTIN_PSLLQI128, UNKNOWN, 0 },
17326
17327 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv8hi3, 0, IX86_BUILTIN_PSRLWI128, UNKNOWN, 0 },
17328 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv4si3, 0, IX86_BUILTIN_PSRLDI128, UNKNOWN, 0 },
17329 { OPTION_MASK_ISA_SSE2, CODE_FOR_lshrv2di3, 0, IX86_BUILTIN_PSRLQI128, UNKNOWN, 0 },
17330
17331 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv8hi3, 0, IX86_BUILTIN_PSRAWI128, UNKNOWN, 0 },
17332 { OPTION_MASK_ISA_SSE2, CODE_FOR_ashrv4si3, 0, IX86_BUILTIN_PSRADI128, UNKNOWN, 0 },
17333
17334 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, 0, IX86_BUILTIN_PMADDWD128, UNKNOWN, 0 },
17335
17336 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsi2sd, 0, IX86_BUILTIN_CVTSI2SD, UNKNOWN, 0 },
17337 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsi2sdq, 0, IX86_BUILTIN_CVTSI642SD, UNKNOWN, 0 },
17338 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2ss, 0, IX86_BUILTIN_CVTSD2SS, UNKNOWN, 0 },
17339 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtss2sd, 0, IX86_BUILTIN_CVTSS2SD, UNKNOWN, 0 },
17340
17341 /* SSE3 MMX */
17342 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv4sf3, "__builtin_ia32_addsubps", IX86_BUILTIN_ADDSUBPS, UNKNOWN, 0 },
17343 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_addsubv2df3, "__builtin_ia32_addsubpd", IX86_BUILTIN_ADDSUBPD, UNKNOWN, 0 },
17344 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv4sf3, "__builtin_ia32_haddps", IX86_BUILTIN_HADDPS, UNKNOWN, 0 },
17345 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_haddv2df3, "__builtin_ia32_haddpd", IX86_BUILTIN_HADDPD, UNKNOWN, 0 },
17346 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv4sf3, "__builtin_ia32_hsubps", IX86_BUILTIN_HSUBPS, UNKNOWN, 0 },
17347 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", IX86_BUILTIN_HSUBPD, UNKNOWN, 0 },
17348
17349 /* SSSE3 */
17350 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, 0 },
17351 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, 0 },
17352 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, 0 },
17353 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, 0 },
17354 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, 0 },
17355 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, 0 },
17356 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, 0 },
17357 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, 0 },
17358 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, 0 },
17359 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, 0 },
17360 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, 0 },
17361 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, 0 },
17362 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv8hi3, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, 0 },
17363 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubswv4hi3, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, 0 },
17364 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, 0 },
17365 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, 0 },
17366 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, 0 },
17367 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, 0 },
17368 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, 0 },
17369 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, 0 },
17370 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, 0 },
17371 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, 0 },
17372 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, 0 },
17373 { OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, 0 },
17374
17375 /* SSE4.1 */
17376 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_packusdw, "__builtin_ia32_packusdw128", IX86_BUILTIN_PACKUSDW128, UNKNOWN, 0 },
17377 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_eqv2di3, "__builtin_ia32_pcmpeqq", IX86_BUILTIN_PCMPEQQ, UNKNOWN, 0 },
17378 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv16qi3, "__builtin_ia32_pmaxsb128", IX86_BUILTIN_PMAXSB128, UNKNOWN, 0 },
17379 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_smaxv4si3, "__builtin_ia32_pmaxsd128", IX86_BUILTIN_PMAXSD128, UNKNOWN, 0 },
17380 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv4si3, "__builtin_ia32_pmaxud128", IX86_BUILTIN_PMAXUD128, UNKNOWN, 0 },
17381 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_umaxv8hi3, "__builtin_ia32_pmaxuw128", IX86_BUILTIN_PMAXUW128, UNKNOWN, 0 },
17382 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv16qi3, "__builtin_ia32_pminsb128", IX86_BUILTIN_PMINSB128, UNKNOWN, 0 },
17383 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sminv4si3, "__builtin_ia32_pminsd128", IX86_BUILTIN_PMINSD128, UNKNOWN, 0 },
17384 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv4si3, "__builtin_ia32_pminud128", IX86_BUILTIN_PMINUD128, UNKNOWN, 0 },
17385 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_uminv8hi3, "__builtin_ia32_pminuw128", IX86_BUILTIN_PMINUW128, UNKNOWN, 0 },
17386 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_mulv2siv2di3, 0, IX86_BUILTIN_PMULDQ128, UNKNOWN, 0 },
17387 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_mulv4si3, "__builtin_ia32_pmulld128", IX86_BUILTIN_PMULLD128, UNKNOWN, 0 },
17388
17389 /* SSE4.2 */
17390 { OPTION_MASK_ISA_SSE4_2, CODE_FOR_sse4_2_gtv2di3, "__builtin_ia32_pcmpgtq", IX86_BUILTIN_PCMPGTQ, UNKNOWN, 0 },
17391 };
17392
17393 static const struct builtin_description bdesc_1arg[] =
17394 {
17395 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB, UNKNOWN, 0 },
17396 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_movmskps, 0, IX86_BUILTIN_MOVMSKPS, UNKNOWN, 0 },
17397
17398 { OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, 0, IX86_BUILTIN_SQRTPS, UNKNOWN, 0 },
17399 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, 0, IX86_BUILTIN_RSQRTPS, UNKNOWN, 0 },
17400 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, 0, IX86_BUILTIN_RCPPS, UNKNOWN, 0 },
17401
17402 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, 0, IX86_BUILTIN_CVTPS2PI, UNKNOWN, 0 },
17403 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, 0, IX86_BUILTIN_CVTSS2SI, UNKNOWN, 0 },
17404 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, 0, IX86_BUILTIN_CVTSS2SI64, UNKNOWN, 0 },
17405 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, 0, IX86_BUILTIN_CVTTPS2PI, UNKNOWN, 0 },
17406 { OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, 0, IX86_BUILTIN_CVTTSS2SI, UNKNOWN, 0 },
17407 { OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, 0, IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, 0 },
17408
17409 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmovmskb, 0, IX86_BUILTIN_PMOVMSKB128, UNKNOWN, 0 },
17410 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_movmskpd, 0, IX86_BUILTIN_MOVMSKPD, UNKNOWN, 0 },
17411
17412 { OPTION_MASK_ISA_SSE2, CODE_FOR_sqrtv2df2, 0, IX86_BUILTIN_SQRTPD, UNKNOWN, 0 },
17413
17414 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, 0, IX86_BUILTIN_CVTDQ2PD, UNKNOWN, 0 },
17415 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2ps, 0, IX86_BUILTIN_CVTDQ2PS, UNKNOWN, 0 },
17416
17417 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, 0, IX86_BUILTIN_CVTPD2DQ, UNKNOWN, 0 },
17418 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, 0, IX86_BUILTIN_CVTPD2PI, UNKNOWN, 0 },
17419 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, 0, IX86_BUILTIN_CVTPD2PS, UNKNOWN, 0 },
17420 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, 0, IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, 0 },
17421 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, 0, IX86_BUILTIN_CVTTPD2PI, UNKNOWN, 0 },
17422
17423 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, 0, IX86_BUILTIN_CVTPI2PD, UNKNOWN, 0 },
17424
17425 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, 0, IX86_BUILTIN_CVTSD2SI, UNKNOWN, 0 },
17426 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, 0, IX86_BUILTIN_CVTTSD2SI, UNKNOWN, 0 },
17427 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvtsd2siq, 0, IX86_BUILTIN_CVTSD2SI64, UNKNOWN, 0 },
17428 { OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, CODE_FOR_sse2_cvttsd2siq, 0, IX86_BUILTIN_CVTTSD2SI64, UNKNOWN, 0 },
17429
17430 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2dq, 0, IX86_BUILTIN_CVTPS2DQ, UNKNOWN, 0 },
17431 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtps2pd, 0, IX86_BUILTIN_CVTPS2PD, UNKNOWN, 0 },
17432 { OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttps2dq, 0, IX86_BUILTIN_CVTTPS2DQ, UNKNOWN, 0 },
17433
17434 /* SSE3 */
17435 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, 0 },
17436 { OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movsldup, "__builtin_ia32_movsldup", IX86_BUILTIN_MOVSLDUP, UNKNOWN, 0 },
17437
17438 /* SSSE3 */
17439 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, 0 },
17440 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, 0 },
17441 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, 0 },
17442 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, 0 },
17443 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, 0 },
17444 { OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, 0 },
17445
17446 /* SSE4.1 */
17447 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVSXBW128, UNKNOWN, 0 },
17448 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVSXBD128, UNKNOWN, 0 },
17449 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVSXBQ128, UNKNOWN, 0 },
17450 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVSXWD128, UNKNOWN, 0 },
17451 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVSXWQ128, UNKNOWN, 0 },
17452 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_extendv2siv2di2, 0, IX86_BUILTIN_PMOVSXDQ128, UNKNOWN, 0 },
17453 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv8qiv8hi2, 0, IX86_BUILTIN_PMOVZXBW128, UNKNOWN, 0 },
17454 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4qiv4si2, 0, IX86_BUILTIN_PMOVZXBD128, UNKNOWN, 0 },
17455 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2qiv2di2, 0, IX86_BUILTIN_PMOVZXBQ128, UNKNOWN, 0 },
17456 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv4hiv4si2, 0, IX86_BUILTIN_PMOVZXWD128, UNKNOWN, 0 },
17457 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2hiv2di2, 0, IX86_BUILTIN_PMOVZXWQ128, UNKNOWN, 0 },
17458 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_zero_extendv2siv2di2, 0, IX86_BUILTIN_PMOVZXDQ128, UNKNOWN, 0 },
17459 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_phminposuw, "__builtin_ia32_phminposuw128", IX86_BUILTIN_PHMINPOSUW128, UNKNOWN, 0 },
17460
17461 /* Fake 1 arg builtins with a constant smaller than 8 bits as the 2nd arg. */
17462 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundpd, 0, IX86_BUILTIN_ROUNDPD, UNKNOWN, 0 },
17463 { OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_roundps, 0, IX86_BUILTIN_ROUNDPS, UNKNOWN, 0 },
17464 };
17465
17466 /* Set up all the MMX/SSE builtins. This is not called if TARGET_MMX
17467 is zero. Otherwise, if TARGET_SSE is not set, only expand the MMX
17468 builtins. */
17469 static void
17470 ix86_init_mmx_sse_builtins (void)
17471 {
17472 const struct builtin_description * d;
17473 size_t i;
17474
17475 tree V16QI_type_node = build_vector_type_for_mode (char_type_node, V16QImode);
17476 tree V2SI_type_node = build_vector_type_for_mode (intSI_type_node, V2SImode);
17477 tree V2SF_type_node = build_vector_type_for_mode (float_type_node, V2SFmode);
17478 tree V2DI_type_node
17479 = build_vector_type_for_mode (long_long_integer_type_node, V2DImode);
17480 tree V2DF_type_node = build_vector_type_for_mode (double_type_node, V2DFmode);
17481 tree V4SF_type_node = build_vector_type_for_mode (float_type_node, V4SFmode);
17482 tree V4SI_type_node = build_vector_type_for_mode (intSI_type_node, V4SImode);
17483 tree V4HI_type_node = build_vector_type_for_mode (intHI_type_node, V4HImode);
17484 tree V8QI_type_node = build_vector_type_for_mode (char_type_node, V8QImode);
17485 tree V8HI_type_node = build_vector_type_for_mode (intHI_type_node, V8HImode);
17486
17487 tree pchar_type_node = build_pointer_type (char_type_node);
17488 tree pcchar_type_node = build_pointer_type (
17489 build_type_variant (char_type_node, 1, 0));
17490 tree pfloat_type_node = build_pointer_type (float_type_node);
17491 tree pcfloat_type_node = build_pointer_type (
17492 build_type_variant (float_type_node, 1, 0));
17493 tree pv2si_type_node = build_pointer_type (V2SI_type_node);
17494 tree pv2di_type_node = build_pointer_type (V2DI_type_node);
17495 tree pdi_type_node = build_pointer_type (long_long_unsigned_type_node);
17496
17497 /* Comparisons. */
17498 tree int_ftype_v4sf_v4sf
17499 = build_function_type_list (integer_type_node,
17500 V4SF_type_node, V4SF_type_node, NULL_TREE);
17501 tree v4si_ftype_v4sf_v4sf
17502 = build_function_type_list (V4SI_type_node,
17503 V4SF_type_node, V4SF_type_node, NULL_TREE);
17504 /* MMX/SSE/integer conversions. */
17505 tree int_ftype_v4sf
17506 = build_function_type_list (integer_type_node,
17507 V4SF_type_node, NULL_TREE);
17508 tree int64_ftype_v4sf
17509 = build_function_type_list (long_long_integer_type_node,
17510 V4SF_type_node, NULL_TREE);
17511 tree int_ftype_v8qi
17512 = build_function_type_list (integer_type_node, V8QI_type_node, NULL_TREE);
17513 tree v4sf_ftype_v4sf_int
17514 = build_function_type_list (V4SF_type_node,
17515 V4SF_type_node, integer_type_node, NULL_TREE);
17516 tree v4sf_ftype_v4sf_int64
17517 = build_function_type_list (V4SF_type_node,
17518 V4SF_type_node, long_long_integer_type_node,
17519 NULL_TREE);
17520 tree v4sf_ftype_v4sf_v2si
17521 = build_function_type_list (V4SF_type_node,
17522 V4SF_type_node, V2SI_type_node, NULL_TREE);
17523
17524 /* Miscellaneous. */
17525 tree v8qi_ftype_v4hi_v4hi
17526 = build_function_type_list (V8QI_type_node,
17527 V4HI_type_node, V4HI_type_node, NULL_TREE);
17528 tree v4hi_ftype_v2si_v2si
17529 = build_function_type_list (V4HI_type_node,
17530 V2SI_type_node, V2SI_type_node, NULL_TREE);
17531 tree v4sf_ftype_v4sf_v4sf_int
17532 = build_function_type_list (V4SF_type_node,
17533 V4SF_type_node, V4SF_type_node,
17534 integer_type_node, NULL_TREE);
17535 tree v2si_ftype_v4hi_v4hi
17536 = build_function_type_list (V2SI_type_node,
17537 V4HI_type_node, V4HI_type_node, NULL_TREE);
17538 tree v4hi_ftype_v4hi_int
17539 = build_function_type_list (V4HI_type_node,
17540 V4HI_type_node, integer_type_node, NULL_TREE);
17541 tree v4hi_ftype_v4hi_di
17542 = build_function_type_list (V4HI_type_node,
17543 V4HI_type_node, long_long_unsigned_type_node,
17544 NULL_TREE);
17545 tree v2si_ftype_v2si_di
17546 = build_function_type_list (V2SI_type_node,
17547 V2SI_type_node, long_long_unsigned_type_node,
17548 NULL_TREE);
17549 tree void_ftype_void
17550 = build_function_type (void_type_node, void_list_node);
17551 tree void_ftype_unsigned
17552 = build_function_type_list (void_type_node, unsigned_type_node, NULL_TREE);
17553 tree void_ftype_unsigned_unsigned
17554 = build_function_type_list (void_type_node, unsigned_type_node,
17555 unsigned_type_node, NULL_TREE);
17556 tree void_ftype_pcvoid_unsigned_unsigned
17557 = build_function_type_list (void_type_node, const_ptr_type_node,
17558 unsigned_type_node, unsigned_type_node,
17559 NULL_TREE);
17560 tree unsigned_ftype_void
17561 = build_function_type (unsigned_type_node, void_list_node);
17562 tree v2si_ftype_v4sf
17563 = build_function_type_list (V2SI_type_node, V4SF_type_node, NULL_TREE);
17564 /* Loads/stores. */
17565 tree void_ftype_v8qi_v8qi_pchar
17566 = build_function_type_list (void_type_node,
17567 V8QI_type_node, V8QI_type_node,
17568 pchar_type_node, NULL_TREE);
17569 tree v4sf_ftype_pcfloat
17570 = build_function_type_list (V4SF_type_node, pcfloat_type_node, NULL_TREE);
17571 /* @@@ the type is bogus */
17572 tree v4sf_ftype_v4sf_pv2si
17573 = build_function_type_list (V4SF_type_node,
17574 V4SF_type_node, pv2si_type_node, NULL_TREE);
17575 tree void_ftype_pv2si_v4sf
17576 = build_function_type_list (void_type_node,
17577 pv2si_type_node, V4SF_type_node, NULL_TREE);
17578 tree void_ftype_pfloat_v4sf
17579 = build_function_type_list (void_type_node,
17580 pfloat_type_node, V4SF_type_node, NULL_TREE);
17581 tree void_ftype_pdi_di
17582 = build_function_type_list (void_type_node,
17583 pdi_type_node, long_long_unsigned_type_node,
17584 NULL_TREE);
17585 tree void_ftype_pv2di_v2di
17586 = build_function_type_list (void_type_node,
17587 pv2di_type_node, V2DI_type_node, NULL_TREE);
17588 /* Normal vector unops. */
17589 tree v4sf_ftype_v4sf
17590 = build_function_type_list (V4SF_type_node, V4SF_type_node, NULL_TREE);
17591 tree v16qi_ftype_v16qi
17592 = build_function_type_list (V16QI_type_node, V16QI_type_node, NULL_TREE);
17593 tree v8hi_ftype_v8hi
17594 = build_function_type_list (V8HI_type_node, V8HI_type_node, NULL_TREE);
17595 tree v4si_ftype_v4si
17596 = build_function_type_list (V4SI_type_node, V4SI_type_node, NULL_TREE);
17597 tree v8qi_ftype_v8qi
17598 = build_function_type_list (V8QI_type_node, V8QI_type_node, NULL_TREE);
17599 tree v4hi_ftype_v4hi
17600 = build_function_type_list (V4HI_type_node, V4HI_type_node, NULL_TREE);
17601
17602 /* Normal vector binops. */
17603 tree v4sf_ftype_v4sf_v4sf
17604 = build_function_type_list (V4SF_type_node,
17605 V4SF_type_node, V4SF_type_node, NULL_TREE);
17606 tree v8qi_ftype_v8qi_v8qi
17607 = build_function_type_list (V8QI_type_node,
17608 V8QI_type_node, V8QI_type_node, NULL_TREE);
17609 tree v4hi_ftype_v4hi_v4hi
17610 = build_function_type_list (V4HI_type_node,
17611 V4HI_type_node, V4HI_type_node, NULL_TREE);
17612 tree v2si_ftype_v2si_v2si
17613 = build_function_type_list (V2SI_type_node,
17614 V2SI_type_node, V2SI_type_node, NULL_TREE);
17615 tree di_ftype_di_di
17616 = build_function_type_list (long_long_unsigned_type_node,
17617 long_long_unsigned_type_node,
17618 long_long_unsigned_type_node, NULL_TREE);
17619
17620 tree di_ftype_di_di_int
17621 = build_function_type_list (long_long_unsigned_type_node,
17622 long_long_unsigned_type_node,
17623 long_long_unsigned_type_node,
17624 integer_type_node, NULL_TREE);
17625
17626 tree v2si_ftype_v2sf
17627 = build_function_type_list (V2SI_type_node, V2SF_type_node, NULL_TREE);
17628 tree v2sf_ftype_v2si
17629 = build_function_type_list (V2SF_type_node, V2SI_type_node, NULL_TREE);
17630 tree v2si_ftype_v2si
17631 = build_function_type_list (V2SI_type_node, V2SI_type_node, NULL_TREE);
17632 tree v2sf_ftype_v2sf
17633 = build_function_type_list (V2SF_type_node, V2SF_type_node, NULL_TREE);
17634 tree v2sf_ftype_v2sf_v2sf
17635 = build_function_type_list (V2SF_type_node,
17636 V2SF_type_node, V2SF_type_node, NULL_TREE);
17637 tree v2si_ftype_v2sf_v2sf
17638 = build_function_type_list (V2SI_type_node,
17639 V2SF_type_node, V2SF_type_node, NULL_TREE);
17640 tree pint_type_node = build_pointer_type (integer_type_node);
17641 tree pdouble_type_node = build_pointer_type (double_type_node);
17642 tree pcdouble_type_node = build_pointer_type (
17643 build_type_variant (double_type_node, 1, 0));
17644 tree int_ftype_v2df_v2df
17645 = build_function_type_list (integer_type_node,
17646 V2DF_type_node, V2DF_type_node, NULL_TREE);
17647
17648 tree void_ftype_pcvoid
17649 = build_function_type_list (void_type_node, const_ptr_type_node, NULL_TREE);
17650 tree v4sf_ftype_v4si
17651 = build_function_type_list (V4SF_type_node, V4SI_type_node, NULL_TREE);
17652 tree v4si_ftype_v4sf
17653 = build_function_type_list (V4SI_type_node, V4SF_type_node, NULL_TREE);
17654 tree v2df_ftype_v4si
17655 = build_function_type_list (V2DF_type_node, V4SI_type_node, NULL_TREE);
17656 tree v4si_ftype_v2df
17657 = build_function_type_list (V4SI_type_node, V2DF_type_node, NULL_TREE);
17658 tree v4si_ftype_v2df_v2df
17659 = build_function_type_list (V4SI_type_node,
17660 V2DF_type_node, V2DF_type_node, NULL_TREE);
17661 tree v2si_ftype_v2df
17662 = build_function_type_list (V2SI_type_node, V2DF_type_node, NULL_TREE);
17663 tree v4sf_ftype_v2df
17664 = build_function_type_list (V4SF_type_node, V2DF_type_node, NULL_TREE);
17665 tree v2df_ftype_v2si
17666 = build_function_type_list (V2DF_type_node, V2SI_type_node, NULL_TREE);
17667 tree v2df_ftype_v4sf
17668 = build_function_type_list (V2DF_type_node, V4SF_type_node, NULL_TREE);
17669 tree int_ftype_v2df
17670 = build_function_type_list (integer_type_node, V2DF_type_node, NULL_TREE);
17671 tree int64_ftype_v2df
17672 = build_function_type_list (long_long_integer_type_node,
17673 V2DF_type_node, NULL_TREE);
17674 tree v2df_ftype_v2df_int
17675 = build_function_type_list (V2DF_type_node,
17676 V2DF_type_node, integer_type_node, NULL_TREE);
17677 tree v2df_ftype_v2df_int64
17678 = build_function_type_list (V2DF_type_node,
17679 V2DF_type_node, long_long_integer_type_node,
17680 NULL_TREE);
17681 tree v4sf_ftype_v4sf_v2df
17682 = build_function_type_list (V4SF_type_node,
17683 V4SF_type_node, V2DF_type_node, NULL_TREE);
17684 tree v2df_ftype_v2df_v4sf
17685 = build_function_type_list (V2DF_type_node,
17686 V2DF_type_node, V4SF_type_node, NULL_TREE);
17687 tree v2df_ftype_v2df_v2df_int
17688 = build_function_type_list (V2DF_type_node,
17689 V2DF_type_node, V2DF_type_node,
17690 integer_type_node,
17691 NULL_TREE);
17692 tree v2df_ftype_v2df_pcdouble
17693 = build_function_type_list (V2DF_type_node,
17694 V2DF_type_node, pcdouble_type_node, NULL_TREE);
17695 tree void_ftype_pdouble_v2df
17696 = build_function_type_list (void_type_node,
17697 pdouble_type_node, V2DF_type_node, NULL_TREE);
17698 tree void_ftype_pint_int
17699 = build_function_type_list (void_type_node,
17700 pint_type_node, integer_type_node, NULL_TREE);
17701 tree void_ftype_v16qi_v16qi_pchar
17702 = build_function_type_list (void_type_node,
17703 V16QI_type_node, V16QI_type_node,
17704 pchar_type_node, NULL_TREE);
17705 tree v2df_ftype_pcdouble
17706 = build_function_type_list (V2DF_type_node, pcdouble_type_node, NULL_TREE);
17707 tree v2df_ftype_v2df_v2df
17708 = build_function_type_list (V2DF_type_node,
17709 V2DF_type_node, V2DF_type_node, NULL_TREE);
17710 tree v16qi_ftype_v16qi_v16qi
17711 = build_function_type_list (V16QI_type_node,
17712 V16QI_type_node, V16QI_type_node, NULL_TREE);
17713 tree v8hi_ftype_v8hi_v8hi
17714 = build_function_type_list (V8HI_type_node,
17715 V8HI_type_node, V8HI_type_node, NULL_TREE);
17716 tree v4si_ftype_v4si_v4si
17717 = build_function_type_list (V4SI_type_node,
17718 V4SI_type_node, V4SI_type_node, NULL_TREE);
17719 tree v2di_ftype_v2di_v2di
17720 = build_function_type_list (V2DI_type_node,
17721 V2DI_type_node, V2DI_type_node, NULL_TREE);
17722 tree v2di_ftype_v2df_v2df
17723 = build_function_type_list (V2DI_type_node,
17724 V2DF_type_node, V2DF_type_node, NULL_TREE);
17725 tree v2df_ftype_v2df
17726 = build_function_type_list (V2DF_type_node, V2DF_type_node, NULL_TREE);
17727 tree v2di_ftype_v2di_int
17728 = build_function_type_list (V2DI_type_node,
17729 V2DI_type_node, integer_type_node, NULL_TREE);
17730 tree v2di_ftype_v2di_v2di_int
17731 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17732 V2DI_type_node, integer_type_node, NULL_TREE);
17733 tree v4si_ftype_v4si_int
17734 = build_function_type_list (V4SI_type_node,
17735 V4SI_type_node, integer_type_node, NULL_TREE);
17736 tree v8hi_ftype_v8hi_int
17737 = build_function_type_list (V8HI_type_node,
17738 V8HI_type_node, integer_type_node, NULL_TREE);
17739 tree v4si_ftype_v8hi_v8hi
17740 = build_function_type_list (V4SI_type_node,
17741 V8HI_type_node, V8HI_type_node, NULL_TREE);
17742 tree di_ftype_v8qi_v8qi
17743 = build_function_type_list (long_long_unsigned_type_node,
17744 V8QI_type_node, V8QI_type_node, NULL_TREE);
17745 tree di_ftype_v2si_v2si
17746 = build_function_type_list (long_long_unsigned_type_node,
17747 V2SI_type_node, V2SI_type_node, NULL_TREE);
17748 tree v2di_ftype_v16qi_v16qi
17749 = build_function_type_list (V2DI_type_node,
17750 V16QI_type_node, V16QI_type_node, NULL_TREE);
17751 tree v2di_ftype_v4si_v4si
17752 = build_function_type_list (V2DI_type_node,
17753 V4SI_type_node, V4SI_type_node, NULL_TREE);
17754 tree int_ftype_v16qi
17755 = build_function_type_list (integer_type_node, V16QI_type_node, NULL_TREE);
17756 tree v16qi_ftype_pcchar
17757 = build_function_type_list (V16QI_type_node, pcchar_type_node, NULL_TREE);
17758 tree void_ftype_pchar_v16qi
17759 = build_function_type_list (void_type_node,
17760 pchar_type_node, V16QI_type_node, NULL_TREE);
17761
17762 tree v2di_ftype_v2di_unsigned_unsigned
17763 = build_function_type_list (V2DI_type_node, V2DI_type_node,
17764 unsigned_type_node, unsigned_type_node,
17765 NULL_TREE);
17766 tree v2di_ftype_v2di_v2di_unsigned_unsigned
17767 = build_function_type_list (V2DI_type_node, V2DI_type_node, V2DI_type_node,
17768 unsigned_type_node, unsigned_type_node,
17769 NULL_TREE);
17770 tree v2di_ftype_v2di_v16qi
17771 = build_function_type_list (V2DI_type_node, V2DI_type_node, V16QI_type_node,
17772 NULL_TREE);
17773 tree v2df_ftype_v2df_v2df_v2df
17774 = build_function_type_list (V2DF_type_node,
17775 V2DF_type_node, V2DF_type_node,
17776 V2DF_type_node, NULL_TREE);
17777 tree v4sf_ftype_v4sf_v4sf_v4sf
17778 = build_function_type_list (V4SF_type_node,
17779 V4SF_type_node, V4SF_type_node,
17780 V4SF_type_node, NULL_TREE);
17781 tree v8hi_ftype_v16qi
17782 = build_function_type_list (V8HI_type_node, V16QI_type_node,
17783 NULL_TREE);
17784 tree v4si_ftype_v16qi
17785 = build_function_type_list (V4SI_type_node, V16QI_type_node,
17786 NULL_TREE);
17787 tree v2di_ftype_v16qi
17788 = build_function_type_list (V2DI_type_node, V16QI_type_node,
17789 NULL_TREE);
17790 tree v4si_ftype_v8hi
17791 = build_function_type_list (V4SI_type_node, V8HI_type_node,
17792 NULL_TREE);
17793 tree v2di_ftype_v8hi
17794 = build_function_type_list (V2DI_type_node, V8HI_type_node,
17795 NULL_TREE);
17796 tree v2di_ftype_v4si
17797 = build_function_type_list (V2DI_type_node, V4SI_type_node,
17798 NULL_TREE);
17799 tree v2di_ftype_pv2di
17800 = build_function_type_list (V2DI_type_node, pv2di_type_node,
17801 NULL_TREE);
17802 tree v16qi_ftype_v16qi_v16qi_int
17803 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17804 V16QI_type_node, integer_type_node,
17805 NULL_TREE);
17806 tree v16qi_ftype_v16qi_v16qi_v16qi
17807 = build_function_type_list (V16QI_type_node, V16QI_type_node,
17808 V16QI_type_node, V16QI_type_node,
17809 NULL_TREE);
17810 tree v8hi_ftype_v8hi_v8hi_int
17811 = build_function_type_list (V8HI_type_node, V8HI_type_node,
17812 V8HI_type_node, integer_type_node,
17813 NULL_TREE);
17814 tree v4si_ftype_v4si_v4si_int
17815 = build_function_type_list (V4SI_type_node, V4SI_type_node,
17816 V4SI_type_node, integer_type_node,
17817 NULL_TREE);
17818 tree int_ftype_v2di_v2di
17819 = build_function_type_list (integer_type_node,
17820 V2DI_type_node, V2DI_type_node,
17821 NULL_TREE);
17822 tree int_ftype_v16qi_int_v16qi_int_int
17823 = build_function_type_list (integer_type_node,
17824 V16QI_type_node,
17825 integer_type_node,
17826 V16QI_type_node,
17827 integer_type_node,
17828 integer_type_node,
17829 NULL_TREE);
17830 tree v16qi_ftype_v16qi_int_v16qi_int_int
17831 = build_function_type_list (V16QI_type_node,
17832 V16QI_type_node,
17833 integer_type_node,
17834 V16QI_type_node,
17835 integer_type_node,
17836 integer_type_node,
17837 NULL_TREE);
17838 tree int_ftype_v16qi_v16qi_int
17839 = build_function_type_list (integer_type_node,
17840 V16QI_type_node,
17841 V16QI_type_node,
17842 integer_type_node,
17843 NULL_TREE);
17844 tree ftype;
17845
17846 /* The __float80 type. */
17847 if (TYPE_MODE (long_double_type_node) == XFmode)
17848 (*lang_hooks.types.register_builtin_type) (long_double_type_node,
17849 "__float80");
17850 else
17851 {
17852 /* The __float80 type. */
17853 tree float80_type_node = make_node (REAL_TYPE);
17854
17855 TYPE_PRECISION (float80_type_node) = 80;
17856 layout_type (float80_type_node);
17857 (*lang_hooks.types.register_builtin_type) (float80_type_node,
17858 "__float80");
17859 }
17860
17861 if (TARGET_64BIT)
17862 {
17863 tree float128_type_node = make_node (REAL_TYPE);
17864
17865 TYPE_PRECISION (float128_type_node) = 128;
17866 layout_type (float128_type_node);
17867 (*lang_hooks.types.register_builtin_type) (float128_type_node,
17868 "__float128");
17869
17870 /* TFmode support builtins. */
17871 ftype = build_function_type (float128_type_node,
17872 void_list_node);
17873 def_builtin (OPTION_MASK_ISA_64BIT, "__builtin_infq", ftype, IX86_BUILTIN_INFQ);
17874
17875 ftype = build_function_type_list (float128_type_node,
17876 float128_type_node,
17877 NULL_TREE);
17878 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_fabsq", ftype, IX86_BUILTIN_FABSQ);
17879
17880 ftype = build_function_type_list (float128_type_node,
17881 float128_type_node,
17882 float128_type_node,
17883 NULL_TREE);
17884 def_builtin_const (OPTION_MASK_ISA_64BIT, "__builtin_copysignq", ftype, IX86_BUILTIN_COPYSIGNQ);
17885 }
17886
17887 /* Add all SSE builtins that are more or less simple operations on
17888 three operands. */
17889 for (i = 0, d = bdesc_sse_3arg;
17890 i < ARRAY_SIZE (bdesc_sse_3arg);
17891 i++, d++)
17892 {
17893 /* Use one of the operands; the target can have a different mode for
17894 mask-generating compares. */
17895 enum machine_mode mode;
17896 tree type;
17897
17898 if (d->name == 0)
17899 continue;
17900 mode = insn_data[d->icode].operand[1].mode;
17901
17902 switch (mode)
17903 {
17904 case V16QImode:
17905 type = v16qi_ftype_v16qi_v16qi_int;
17906 break;
17907 case V8HImode:
17908 type = v8hi_ftype_v8hi_v8hi_int;
17909 break;
17910 case V4SImode:
17911 type = v4si_ftype_v4si_v4si_int;
17912 break;
17913 case V2DImode:
17914 type = v2di_ftype_v2di_v2di_int;
17915 break;
17916 case V2DFmode:
17917 type = v2df_ftype_v2df_v2df_int;
17918 break;
17919 case V4SFmode:
17920 type = v4sf_ftype_v4sf_v4sf_int;
17921 break;
17922 default:
17923 gcc_unreachable ();
17924 }
17925
17926 /* Override for variable blends. */
17927 switch (d->icode)
17928 {
17929 case CODE_FOR_sse4_1_blendvpd:
17930 type = v2df_ftype_v2df_v2df_v2df;
17931 break;
17932 case CODE_FOR_sse4_1_blendvps:
17933 type = v4sf_ftype_v4sf_v4sf_v4sf;
17934 break;
17935 case CODE_FOR_sse4_1_pblendvb:
17936 type = v16qi_ftype_v16qi_v16qi_v16qi;
17937 break;
17938 default:
17939 break;
17940 }
17941
17942 def_builtin_const (d->mask, d->name, type, d->code);
17943 }
17944
17945 /* Add all builtins that are more or less simple operations on two
17946 operands. */
17947 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
17948 {
17949 /* Use one of the operands; the target can have a different mode for
17950 mask-generating compares. */
17951 enum machine_mode mode;
17952 tree type;
17953
17954 if (d->name == 0)
17955 continue;
17956 mode = insn_data[d->icode].operand[1].mode;
17957
17958 switch (mode)
17959 {
17960 case V16QImode:
17961 type = v16qi_ftype_v16qi_v16qi;
17962 break;
17963 case V8HImode:
17964 type = v8hi_ftype_v8hi_v8hi;
17965 break;
17966 case V4SImode:
17967 type = v4si_ftype_v4si_v4si;
17968 break;
17969 case V2DImode:
17970 type = v2di_ftype_v2di_v2di;
17971 break;
17972 case V2DFmode:
17973 type = v2df_ftype_v2df_v2df;
17974 break;
17975 case V4SFmode:
17976 type = v4sf_ftype_v4sf_v4sf;
17977 break;
17978 case V8QImode:
17979 type = v8qi_ftype_v8qi_v8qi;
17980 break;
17981 case V4HImode:
17982 type = v4hi_ftype_v4hi_v4hi;
17983 break;
17984 case V2SImode:
17985 type = v2si_ftype_v2si_v2si;
17986 break;
17987 case DImode:
17988 type = di_ftype_di_di;
17989 break;
17990
17991 default:
17992 gcc_unreachable ();
17993 }
17994
17995 /* Override for comparisons. */
17996 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
17997 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3)
17998 type = v4si_ftype_v4sf_v4sf;
17999
18000 if (d->icode == CODE_FOR_sse2_maskcmpv2df3
18001 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
18002 type = v2di_ftype_v2df_v2df;
18003
18004 if (d->icode == CODE_FOR_vec_pack_sfix_v2df)
18005 type = v4si_ftype_v2df_v2df;
18006
18007 def_builtin_const (d->mask, d->name, type, d->code);
18008 }
18009
18010 /* Add all builtins that are more or less simple operations on 1 operand. */
18011 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
18012 {
18013 enum machine_mode mode;
18014 tree type;
18015
18016 if (d->name == 0)
18017 continue;
18018 mode = insn_data[d->icode].operand[1].mode;
18019
18020 switch (mode)
18021 {
18022 case V16QImode:
18023 type = v16qi_ftype_v16qi;
18024 break;
18025 case V8HImode:
18026 type = v8hi_ftype_v8hi;
18027 break;
18028 case V4SImode:
18029 type = v4si_ftype_v4si;
18030 break;
18031 case V2DFmode:
18032 type = v2df_ftype_v2df;
18033 break;
18034 case V4SFmode:
18035 type = v4sf_ftype_v4sf;
18036 break;
18037 case V8QImode:
18038 type = v8qi_ftype_v8qi;
18039 break;
18040 case V4HImode:
18041 type = v4hi_ftype_v4hi;
18042 break;
18043 case V2SImode:
18044 type = v2si_ftype_v2si;
18045 break;
18046
18047 default:
18048 abort ();
18049 }
18050
18051 def_builtin_const (d->mask, d->name, type, d->code);
18052 }
18053
18054 /* pcmpestr[im] insns. */
18055 for (i = 0, d = bdesc_pcmpestr;
18056 i < ARRAY_SIZE (bdesc_pcmpestr);
18057 i++, d++)
18058 {
18059 if (d->code == IX86_BUILTIN_PCMPESTRM128)
18060 ftype = v16qi_ftype_v16qi_int_v16qi_int_int;
18061 else
18062 ftype = int_ftype_v16qi_int_v16qi_int_int;
18063 def_builtin_const (d->mask, d->name, ftype, d->code);
18064 }
18065
18066 /* pcmpistr[im] insns. */
18067 for (i = 0, d = bdesc_pcmpistr;
18068 i < ARRAY_SIZE (bdesc_pcmpistr);
18069 i++, d++)
18070 {
18071 if (d->code == IX86_BUILTIN_PCMPISTRM128)
18072 ftype = v16qi_ftype_v16qi_v16qi_int;
18073 else
18074 ftype = int_ftype_v16qi_v16qi_int;
18075 def_builtin_const (d->mask, d->name, ftype, d->code);
18076 }
18077
18078 /* Add the remaining MMX insns with somewhat more complicated types. */
18079 def_builtin (OPTION_MASK_ISA_MMX, "__builtin_ia32_emms", void_ftype_void, IX86_BUILTIN_EMMS);
18080 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSLLW);
18081 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pslld", v2si_ftype_v2si_di, IX86_BUILTIN_PSLLD);
18082 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psllq", di_ftype_di_di, IX86_BUILTIN_PSLLQ);
18083
18084 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRLW);
18085 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrld", v2si_ftype_v2si_di, IX86_BUILTIN_PSRLD);
18086 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrlq", di_ftype_di_di, IX86_BUILTIN_PSRLQ);
18087
18088 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psraw", v4hi_ftype_v4hi_di, IX86_BUILTIN_PSRAW);
18089 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_psrad", v2si_ftype_v2si_di, IX86_BUILTIN_PSRAD);
18090
18091 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pshufw", v4hi_ftype_v4hi_int, IX86_BUILTIN_PSHUFW);
18092 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_pmaddwd", v2si_ftype_v4hi_v4hi, IX86_BUILTIN_PMADDWD);
18093
18094 /* comi/ucomi insns. */
18095 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
18096 if (d->mask == OPTION_MASK_ISA_SSE2)
18097 def_builtin_const (d->mask, d->name, int_ftype_v2df_v2df, d->code);
18098 else
18099 def_builtin_const (d->mask, d->name, int_ftype_v4sf_v4sf, d->code);
18100
18101 /* ptest insns. */
18102 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
18103 def_builtin_const (d->mask, d->name, int_ftype_v2di_v2di, d->code);
18104
18105 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packsswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKSSWB);
18106 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packssdw", v4hi_ftype_v2si_v2si, IX86_BUILTIN_PACKSSDW);
18107 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_packuswb", v8qi_ftype_v4hi_v4hi, IX86_BUILTIN_PACKUSWB);
18108
18109 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_ldmxcsr", void_ftype_unsigned, IX86_BUILTIN_LDMXCSR);
18110 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_stmxcsr", unsigned_ftype_void, IX86_BUILTIN_STMXCSR);
18111 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtpi2ps", v4sf_ftype_v4sf_v2si, IX86_BUILTIN_CVTPI2PS);
18112 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTPS2PI);
18113 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtsi2ss", v4sf_ftype_v4sf_int, IX86_BUILTIN_CVTSI2SS);
18114 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642ss", v4sf_ftype_v4sf_int64, IX86_BUILTIN_CVTSI642SS);
18115 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvtss2si", int_ftype_v4sf, IX86_BUILTIN_CVTSS2SI);
18116 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTSS2SI64);
18117 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttps2pi", v2si_ftype_v4sf, IX86_BUILTIN_CVTTPS2PI);
18118 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_cvttss2si", int_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI);
18119 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttss2si64", int64_ftype_v4sf, IX86_BUILTIN_CVTTSS2SI64);
18120
18121 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_maskmovq", void_ftype_v8qi_v8qi_pchar, IX86_BUILTIN_MASKMOVQ);
18122
18123 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadups", v4sf_ftype_pcfloat, IX86_BUILTIN_LOADUPS);
18124 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storeups", void_ftype_pfloat_v4sf, IX86_BUILTIN_STOREUPS);
18125
18126 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadhps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADHPS);
18127 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_loadlps", v4sf_ftype_v4sf_pv2si, IX86_BUILTIN_LOADLPS);
18128 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storehps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STOREHPS);
18129 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_storelps", void_ftype_pv2si_v4sf, IX86_BUILTIN_STORELPS);
18130
18131 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_movmskps", int_ftype_v4sf, IX86_BUILTIN_MOVMSKPS);
18132 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pmovmskb", int_ftype_v8qi, IX86_BUILTIN_PMOVMSKB);
18133 def_builtin (OPTION_MASK_ISA_SSE, "__builtin_ia32_movntps", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTPS);
18134 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_movntq", void_ftype_pdi_di, IX86_BUILTIN_MOVNTQ);
18135
18136 def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_sfence", void_ftype_void, IX86_BUILTIN_SFENCE);
18137
18138 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_psadbw", di_ftype_v8qi_v8qi, IX86_BUILTIN_PSADBW);
18139
18140 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpps", v4sf_ftype_v4sf, IX86_BUILTIN_RCPPS);
18141 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rcpss", v4sf_ftype_v4sf, IX86_BUILTIN_RCPSS);
18142 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTPS);
18143 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_RSQRTSS);
18144 ftype = build_function_type_list (float_type_node,
18145 float_type_node,
18146 NULL_TREE);
18147 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_rsqrtf", ftype, IX86_BUILTIN_RSQRTF);
18148 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtps", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTPS);
18149 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_sqrtss", v4sf_ftype_v4sf, IX86_BUILTIN_SQRTSS);
18150
18151 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_shufps", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_SHUFPS);
18152
18153 /* Original 3DNow! */
18154 def_builtin (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_femms", void_ftype_void, IX86_BUILTIN_FEMMS);
18155 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pavgusb", v8qi_ftype_v8qi_v8qi, IX86_BUILTIN_PAVGUSB);
18156 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pf2id", v2si_ftype_v2sf, IX86_BUILTIN_PF2ID);
18157 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFACC);
18158 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfadd", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFADD);
18159 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpeq", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPEQ);
18160 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpge", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGE);
18161 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfcmpgt", v2si_ftype_v2sf_v2sf, IX86_BUILTIN_PFCMPGT);
18162 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmax", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMAX);
18163 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmin", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMIN);
18164 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfmul", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFMUL);
18165 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcp", v2sf_ftype_v2sf, IX86_BUILTIN_PFRCP);
18166 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT1);
18167 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrcpit2", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRCPIT2);
18168 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqrt", v2sf_ftype_v2sf, IX86_BUILTIN_PFRSQRT);
18169 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfrsqit1", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFRSQIT1);
18170 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsub", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUB);
18171 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pfsubr", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFSUBR);
18172 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pi2fd", v2sf_ftype_v2si, IX86_BUILTIN_PI2FD);
18173 def_builtin_const (OPTION_MASK_ISA_3DNOW, "__builtin_ia32_pmulhrw", v4hi_ftype_v4hi_v4hi, IX86_BUILTIN_PMULHRW);
18174
18175 /* 3DNow! extension as used in the Athlon CPU. */
18176 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pf2iw", v2si_ftype_v2sf, IX86_BUILTIN_PF2IW);
18177 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFNACC);
18178 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pfpnacc", v2sf_ftype_v2sf_v2sf, IX86_BUILTIN_PFPNACC);
18179 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pi2fw", v2sf_ftype_v2si, IX86_BUILTIN_PI2FW);
18180 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsf", v2sf_ftype_v2sf, IX86_BUILTIN_PSWAPDSF);
18181 def_builtin_const (OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_pswapdsi", v2si_ftype_v2si, IX86_BUILTIN_PSWAPDSI);
18182
18183 /* SSE2 */
18184 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_maskmovdqu", void_ftype_v16qi_v16qi_pchar, IX86_BUILTIN_MASKMOVDQU);
18185
18186 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadupd", v2df_ftype_pcdouble, IX86_BUILTIN_LOADUPD);
18187 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storeupd", void_ftype_pdouble_v2df, IX86_BUILTIN_STOREUPD);
18188
18189 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadhpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADHPD);
18190 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loadlpd", v2df_ftype_v2df_pcdouble, IX86_BUILTIN_LOADLPD);
18191
18192 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movmskpd", int_ftype_v2df, IX86_BUILTIN_MOVMSKPD);
18193 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmovmskb128", int_ftype_v16qi, IX86_BUILTIN_PMOVMSKB128);
18194 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movnti", void_ftype_pint_int, IX86_BUILTIN_MOVNTI);
18195 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntpd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTPD);
18196 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_movntdq", void_ftype_pv2di_v2di, IX86_BUILTIN_MOVNTDQ);
18197
18198 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufd", v4si_ftype_v4si_int, IX86_BUILTIN_PSHUFD);
18199 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshuflw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFLW);
18200 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pshufhw", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSHUFHW);
18201 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psadbw128", v2di_ftype_v16qi_v16qi, IX86_BUILTIN_PSADBW128);
18202
18203 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtpd", v2df_ftype_v2df, IX86_BUILTIN_SQRTPD);
18204 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_sqrtsd", v2df_ftype_v2df, IX86_BUILTIN_SQRTSD);
18205
18206 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_shufpd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_SHUFPD);
18207
18208 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2pd", v2df_ftype_v4si, IX86_BUILTIN_CVTDQ2PD);
18209 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtdq2ps", v4sf_ftype_v4si, IX86_BUILTIN_CVTDQ2PS);
18210
18211 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTPD2DQ);
18212 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTPD2PI);
18213 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpd2ps", v4sf_ftype_v2df, IX86_BUILTIN_CVTPD2PS);
18214 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2dq", v4si_ftype_v2df, IX86_BUILTIN_CVTTPD2DQ);
18215 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttpd2pi", v2si_ftype_v2df, IX86_BUILTIN_CVTTPD2PI);
18216
18217 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtpi2pd", v2df_ftype_v2si, IX86_BUILTIN_CVTPI2PD);
18218
18219 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2si", int_ftype_v2df, IX86_BUILTIN_CVTSD2SI);
18220 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttsd2si", int_ftype_v2df, IX86_BUILTIN_CVTTSD2SI);
18221 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTSD2SI64);
18222 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvttsd2si64", int64_ftype_v2df, IX86_BUILTIN_CVTTSD2SI64);
18223
18224 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTPS2DQ);
18225 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtps2pd", v2df_ftype_v4sf, IX86_BUILTIN_CVTPS2PD);
18226 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvttps2dq", v4si_ftype_v4sf, IX86_BUILTIN_CVTTPS2DQ);
18227
18228 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsi2sd", v2df_ftype_v2df_int, IX86_BUILTIN_CVTSI2SD);
18229 def_builtin_const (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_cvtsi642sd", v2df_ftype_v2df_int64, IX86_BUILTIN_CVTSI642SD);
18230 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtsd2ss", v4sf_ftype_v4sf_v2df, IX86_BUILTIN_CVTSD2SS);
18231 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_cvtss2sd", v2df_ftype_v2df_v4sf, IX86_BUILTIN_CVTSS2SD);
18232
18233 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_clflush", void_ftype_pcvoid, IX86_BUILTIN_CLFLUSH);
18234 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_lfence", void_ftype_void, IX86_BUILTIN_LFENCE);
18235 x86_mfence = def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_mfence", void_ftype_void, IX86_BUILTIN_MFENCE);
18236
18237 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_loaddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LOADDQU);
18238 def_builtin (OPTION_MASK_ISA_SSE2, "__builtin_ia32_storedqu", void_ftype_pchar_v16qi, IX86_BUILTIN_STOREDQU);
18239
18240 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq", di_ftype_v2si_v2si, IX86_BUILTIN_PMULUDQ);
18241 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmuludq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULUDQ128);
18242
18243 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLDQI128);
18244 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSLLWI128);
18245 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSLLDI128);
18246 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSLLQI128);
18247 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSLLW128);
18248 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pslld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSLLD128);
18249 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psllq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSLLQ128);
18250
18251 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLDQI128);
18252 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlwi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRLWI128);
18253 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrldi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRLDI128);
18254 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlqi128", v2di_ftype_v2di_int, IX86_BUILTIN_PSRLQI128);
18255 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRLW128);
18256 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrld128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRLD128);
18257 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrlq128", v2di_ftype_v2di_v2di, IX86_BUILTIN_PSRLQ128);
18258
18259 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrawi128", v8hi_ftype_v8hi_int, IX86_BUILTIN_PSRAWI128);
18260 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psradi128", v4si_ftype_v4si_int, IX86_BUILTIN_PSRADI128);
18261 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psraw128", v8hi_ftype_v8hi_v8hi, IX86_BUILTIN_PSRAW128);
18262 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_psrad128", v4si_ftype_v4si_v4si, IX86_BUILTIN_PSRAD128);
18263
18264 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_pmaddwd128", v4si_ftype_v8hi_v8hi, IX86_BUILTIN_PMADDWD128);
18265
18266 /* Prescott New Instructions. */
18267 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_monitor", void_ftype_pcvoid_unsigned_unsigned, IX86_BUILTIN_MONITOR);
18268 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_mwait", void_ftype_unsigned_unsigned, IX86_BUILTIN_MWAIT);
18269 def_builtin (OPTION_MASK_ISA_SSE3, "__builtin_ia32_lddqu", v16qi_ftype_pcchar, IX86_BUILTIN_LDDQU);
18270
18271 /* SSSE3. */
18272 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr128", v2di_ftype_v2di_v2di_int, IX86_BUILTIN_PALIGNR128);
18273 def_builtin_const (OPTION_MASK_ISA_SSSE3, "__builtin_ia32_palignr", di_ftype_di_di_int, IX86_BUILTIN_PALIGNR);
18274
18275 /* SSE4.1. */
18276 def_builtin (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_movntdqa", v2di_ftype_pv2di, IX86_BUILTIN_MOVNTDQA);
18277 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVSXBW128);
18278 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVSXBD128);
18279 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVSXBQ128);
18280 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVSXWD128);
18281 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVSXWQ128);
18282 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovsxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVSXDQ128);
18283 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbw128", v8hi_ftype_v16qi, IX86_BUILTIN_PMOVZXBW128);
18284 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbd128", v4si_ftype_v16qi, IX86_BUILTIN_PMOVZXBD128);
18285 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxbq128", v2di_ftype_v16qi, IX86_BUILTIN_PMOVZXBQ128);
18286 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwd128", v4si_ftype_v8hi, IX86_BUILTIN_PMOVZXWD128);
18287 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxwq128", v2di_ftype_v8hi, IX86_BUILTIN_PMOVZXWQ128);
18288 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmovzxdq128", v2di_ftype_v4si, IX86_BUILTIN_PMOVZXDQ128);
18289 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_pmuldq128", v2di_ftype_v4si_v4si, IX86_BUILTIN_PMULDQ128);
18290 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundpd", v2df_ftype_v2df_int, IX86_BUILTIN_ROUNDPD);
18291 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundps", v4sf_ftype_v4sf_int, IX86_BUILTIN_ROUNDPS);
18292 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundsd", v2df_ftype_v2df_v2df_int, IX86_BUILTIN_ROUNDSD);
18293 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_roundss", v4sf_ftype_v4sf_v4sf_int, IX86_BUILTIN_ROUNDSS);
18294
18295 /* SSE4.2. */
18296 ftype = build_function_type_list (unsigned_type_node,
18297 unsigned_type_node,
18298 unsigned_char_type_node,
18299 NULL_TREE);
18300 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32qi", ftype, IX86_BUILTIN_CRC32QI);
18301 ftype = build_function_type_list (unsigned_type_node,
18302 unsigned_type_node,
18303 short_unsigned_type_node,
18304 NULL_TREE);
18305 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32hi", ftype, IX86_BUILTIN_CRC32HI);
18306 ftype = build_function_type_list (unsigned_type_node,
18307 unsigned_type_node,
18308 unsigned_type_node,
18309 NULL_TREE);
18310 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32si", ftype, IX86_BUILTIN_CRC32SI);
18311 ftype = build_function_type_list (long_long_unsigned_type_node,
18312 long_long_unsigned_type_node,
18313 long_long_unsigned_type_node,
18314 NULL_TREE);
18315 def_builtin_const (OPTION_MASK_ISA_SSE4_2, "__builtin_ia32_crc32di", ftype, IX86_BUILTIN_CRC32DI);
18316
18317 /* AMDFAM10 SSE4A New built-ins */
18318 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntsd", void_ftype_pdouble_v2df, IX86_BUILTIN_MOVNTSD);
18319 def_builtin (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_movntss", void_ftype_pfloat_v4sf, IX86_BUILTIN_MOVNTSS);
18320 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrqi", v2di_ftype_v2di_unsigned_unsigned, IX86_BUILTIN_EXTRQI);
18321 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_extrq", v2di_ftype_v2di_v16qi, IX86_BUILTIN_EXTRQ);
18322 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertqi", v2di_ftype_v2di_v2di_unsigned_unsigned, IX86_BUILTIN_INSERTQI);
18323 def_builtin_const (OPTION_MASK_ISA_SSE4A, "__builtin_ia32_insertq", v2di_ftype_v2di_v2di, IX86_BUILTIN_INSERTQ);
18324
18325 /* Access to the vec_init patterns. */
18326 ftype = build_function_type_list (V2SI_type_node, integer_type_node,
18327 integer_type_node, NULL_TREE);
18328 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v2si", ftype, IX86_BUILTIN_VEC_INIT_V2SI);
18329
18330 ftype = build_function_type_list (V4HI_type_node, short_integer_type_node,
18331 short_integer_type_node,
18332 short_integer_type_node,
18333 short_integer_type_node, NULL_TREE);
18334 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v4hi", ftype, IX86_BUILTIN_VEC_INIT_V4HI);
18335
18336 ftype = build_function_type_list (V8QI_type_node, char_type_node,
18337 char_type_node, char_type_node,
18338 char_type_node, char_type_node,
18339 char_type_node, char_type_node,
18340 char_type_node, NULL_TREE);
18341 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_init_v8qi", ftype, IX86_BUILTIN_VEC_INIT_V8QI);
18342
18343 /* Access to the vec_extract patterns. */
18344 ftype = build_function_type_list (double_type_node, V2DF_type_node,
18345 integer_type_node, NULL_TREE);
18346 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2df", ftype, IX86_BUILTIN_VEC_EXT_V2DF);
18347
18348 ftype = build_function_type_list (long_long_integer_type_node,
18349 V2DI_type_node, integer_type_node,
18350 NULL_TREE);
18351 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v2di", ftype, IX86_BUILTIN_VEC_EXT_V2DI);
18352
18353 ftype = build_function_type_list (float_type_node, V4SF_type_node,
18354 integer_type_node, NULL_TREE);
18355 def_builtin_const (OPTION_MASK_ISA_SSE, "__builtin_ia32_vec_ext_v4sf", ftype, IX86_BUILTIN_VEC_EXT_V4SF);
18356
18357 ftype = build_function_type_list (intSI_type_node, V4SI_type_node,
18358 integer_type_node, NULL_TREE);
18359 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v4si", ftype, IX86_BUILTIN_VEC_EXT_V4SI);
18360
18361 ftype = build_function_type_list (intHI_type_node, V8HI_type_node,
18362 integer_type_node, NULL_TREE);
18363 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi", ftype, IX86_BUILTIN_VEC_EXT_V8HI);
18364
18365 ftype = build_function_type_list (intHI_type_node, V4HI_type_node,
18366 integer_type_node, NULL_TREE);
18367 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_ext_v4hi", ftype, IX86_BUILTIN_VEC_EXT_V4HI);
18368
18369 ftype = build_function_type_list (intSI_type_node, V2SI_type_node,
18370 integer_type_node, NULL_TREE);
18371 def_builtin_const (OPTION_MASK_ISA_MMX, "__builtin_ia32_vec_ext_v2si", ftype, IX86_BUILTIN_VEC_EXT_V2SI);
18372
18373 ftype = build_function_type_list (intQI_type_node, V16QI_type_node,
18374 integer_type_node, NULL_TREE);
18375 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v16qi", ftype, IX86_BUILTIN_VEC_EXT_V16QI);
18376
18377 /* Access to the vec_set patterns. */
18378 ftype = build_function_type_list (V2DI_type_node, V2DI_type_node,
18379 intDI_type_node,
18380 integer_type_node, NULL_TREE);
18381 def_builtin_const (OPTION_MASK_ISA_SSE4_1 | OPTION_MASK_ISA_64BIT, "__builtin_ia32_vec_set_v2di", ftype, IX86_BUILTIN_VEC_SET_V2DI);
18382
18383 ftype = build_function_type_list (V4SF_type_node, V4SF_type_node,
18384 float_type_node,
18385 integer_type_node, NULL_TREE);
18386 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4sf", ftype, IX86_BUILTIN_VEC_SET_V4SF);
18387
18388 ftype = build_function_type_list (V4SI_type_node, V4SI_type_node,
18389 intSI_type_node,
18390 integer_type_node, NULL_TREE);
18391 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v4si", ftype, IX86_BUILTIN_VEC_SET_V4SI);
18392
18393 ftype = build_function_type_list (V8HI_type_node, V8HI_type_node,
18394 intHI_type_node,
18395 integer_type_node, NULL_TREE);
18396 def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi", ftype, IX86_BUILTIN_VEC_SET_V8HI);
18397
18398 ftype = build_function_type_list (V4HI_type_node, V4HI_type_node,
18399 intHI_type_node,
18400 integer_type_node, NULL_TREE);
18401 def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, "__builtin_ia32_vec_set_v4hi", ftype, IX86_BUILTIN_VEC_SET_V4HI);
18402
18403 ftype = build_function_type_list (V16QI_type_node, V16QI_type_node,
18404 intQI_type_node,
18405 integer_type_node, NULL_TREE);
18406 def_builtin_const (OPTION_MASK_ISA_SSE4_1, "__builtin_ia32_vec_set_v16qi", ftype, IX86_BUILTIN_VEC_SET_V16QI);
18407 }
18408
18409 static void
18410 ix86_init_builtins (void)
18411 {
18412 if (TARGET_MMX)
18413 ix86_init_mmx_sse_builtins ();
18414 }
18415
18416 /* Errors in the source file can cause expand_expr to return const0_rtx
18417 where we expect a vector. To avoid crashing, use one of the vector
18418 clear instructions. */
18419 static rtx
18420 safe_vector_operand (rtx x, enum machine_mode mode)
18421 {
18422 if (x == const0_rtx)
18423 x = CONST0_RTX (mode);
18424 return x;
18425 }
18426
18427 /* Subroutine of ix86_expand_builtin to take care of SSE insns with
18428 4 operands. The third argument must be a constant smaller than 8
18429 bits or xmm0. */
18430
18431 static rtx
18432 ix86_expand_sse_4_operands_builtin (enum insn_code icode, tree exp,
18433 rtx target)
18434 {
18435 rtx pat;
18436 tree arg0 = CALL_EXPR_ARG (exp, 0);
18437 tree arg1 = CALL_EXPR_ARG (exp, 1);
18438 tree arg2 = CALL_EXPR_ARG (exp, 2);
18439 rtx op0 = expand_normal (arg0);
18440 rtx op1 = expand_normal (arg1);
18441 rtx op2 = expand_normal (arg2);
18442 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18443 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18444 enum machine_mode mode2 = insn_data[icode].operand[2].mode;
18445 enum machine_mode mode3 = insn_data[icode].operand[3].mode;
18446
18447 if (VECTOR_MODE_P (mode1))
18448 op0 = safe_vector_operand (op0, mode1);
18449 if (VECTOR_MODE_P (mode2))
18450 op1 = safe_vector_operand (op1, mode2);
18451 if (VECTOR_MODE_P (mode3))
18452 op2 = safe_vector_operand (op2, mode3);
18453
18454 if (optimize
18455 || target == 0
18456 || GET_MODE (target) != tmode
18457 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18458 target = gen_reg_rtx (tmode);
18459
18460 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
18461 op0 = copy_to_mode_reg (mode1, op0);
18462 if ((optimize && !register_operand (op1, mode2))
18463 || !(*insn_data[icode].operand[2].predicate) (op1, mode2))
18464 op1 = copy_to_mode_reg (mode2, op1);
18465
18466 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
18467 switch (icode)
18468 {
18469 case CODE_FOR_sse4_1_blendvpd:
18470 case CODE_FOR_sse4_1_blendvps:
18471 case CODE_FOR_sse4_1_pblendvb:
18472 op2 = copy_to_mode_reg (mode3, op2);
18473 break;
18474
18475 case CODE_FOR_sse4_1_roundsd:
18476 case CODE_FOR_sse4_1_roundss:
18477 error ("the third argument must be a 4-bit immediate");
18478 return const0_rtx;
18479
18480 default:
18481 error ("the third argument must be an 8-bit immediate");
18482 return const0_rtx;
18483 }
18484
18485 pat = GEN_FCN (icode) (target, op0, op1, op2);
18486 if (! pat)
18487 return 0;
18488 emit_insn (pat);
18489 return target;
18490 }
18491
18492 /* Subroutine of ix86_expand_builtin to take care of crc32 insns. */
18493
18494 static rtx
18495 ix86_expand_crc32 (enum insn_code icode, tree exp, rtx target)
18496 {
18497 rtx pat;
18498 tree arg0 = CALL_EXPR_ARG (exp, 0);
18499 tree arg1 = CALL_EXPR_ARG (exp, 1);
18500 rtx op0 = expand_normal (arg0);
18501 rtx op1 = expand_normal (arg1);
18502 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18503 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18504 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18505
18506 if (optimize
18507 || !target
18508 || GET_MODE (target) != tmode
18509 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18510 target = gen_reg_rtx (tmode);
18511
18512 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18513 op0 = copy_to_mode_reg (mode0, op0);
18514 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18515 {
18516 op1 = copy_to_reg (op1);
18517 op1 = simplify_gen_subreg (mode1, op1, GET_MODE (op1), 0);
18518 }
18519
18520 pat = GEN_FCN (icode) (target, op0, op1);
18521 if (! pat)
18522 return 0;
18523 emit_insn (pat);
18524 return target;
18525 }
18526
18527 /* Subroutine of ix86_expand_builtin to take care of binop insns. */
18528
18529 static rtx
18530 ix86_expand_binop_builtin (enum insn_code icode, tree exp, rtx target)
18531 {
18532 rtx pat, xops[3];
18533 tree arg0 = CALL_EXPR_ARG (exp, 0);
18534 tree arg1 = CALL_EXPR_ARG (exp, 1);
18535 rtx op0 = expand_normal (arg0);
18536 rtx op1 = expand_normal (arg1);
18537 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18538 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18539 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18540
18541 if (VECTOR_MODE_P (mode0))
18542 op0 = safe_vector_operand (op0, mode0);
18543 if (VECTOR_MODE_P (mode1))
18544 op1 = safe_vector_operand (op1, mode1);
18545
18546 if (optimize || !target
18547 || GET_MODE (target) != tmode
18548 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18549 target = gen_reg_rtx (tmode);
18550
18551 if (GET_MODE (op1) == SImode && mode1 == TImode)
18552 {
18553 rtx x = gen_reg_rtx (V4SImode);
18554 emit_insn (gen_sse2_loadd (x, op1));
18555 op1 = gen_lowpart (TImode, x);
18556 }
18557
18558 if (!(*insn_data[icode].operand[1].predicate) (op0, mode0))
18559 op0 = copy_to_mode_reg (mode0, op0);
18560 if (!(*insn_data[icode].operand[2].predicate) (op1, mode1))
18561 op1 = copy_to_mode_reg (mode1, op1);
18562
18563 /* ??? Using ix86_fixup_binary_operands is problematic when
18564 we've got mismatched modes. Fake it. */
18565
18566 xops[0] = target;
18567 xops[1] = op0;
18568 xops[2] = op1;
18569
18570 if (tmode == mode0 && tmode == mode1)
18571 {
18572 target = ix86_fixup_binary_operands (UNKNOWN, tmode, xops);
18573 op0 = xops[1];
18574 op1 = xops[2];
18575 }
18576 else if (optimize || !ix86_binary_operator_ok (UNKNOWN, tmode, xops))
18577 {
18578 op0 = force_reg (mode0, op0);
18579 op1 = force_reg (mode1, op1);
18580 target = gen_reg_rtx (tmode);
18581 }
18582
18583 pat = GEN_FCN (icode) (target, op0, op1);
18584 if (! pat)
18585 return 0;
18586 emit_insn (pat);
18587 return target;
18588 }
18589
18590 /* Subroutine of ix86_expand_builtin to take care of stores. */
18591
18592 static rtx
18593 ix86_expand_store_builtin (enum insn_code icode, tree exp)
18594 {
18595 rtx pat;
18596 tree arg0 = CALL_EXPR_ARG (exp, 0);
18597 tree arg1 = CALL_EXPR_ARG (exp, 1);
18598 rtx op0 = expand_normal (arg0);
18599 rtx op1 = expand_normal (arg1);
18600 enum machine_mode mode0 = insn_data[icode].operand[0].mode;
18601 enum machine_mode mode1 = insn_data[icode].operand[1].mode;
18602
18603 if (VECTOR_MODE_P (mode1))
18604 op1 = safe_vector_operand (op1, mode1);
18605
18606 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18607 op1 = copy_to_mode_reg (mode1, op1);
18608
18609 pat = GEN_FCN (icode) (op0, op1);
18610 if (pat)
18611 emit_insn (pat);
18612 return 0;
18613 }
18614
18615 /* Subroutine of ix86_expand_builtin to take care of unop insns. */
18616
18617 static rtx
18618 ix86_expand_unop_builtin (enum insn_code icode, tree exp,
18619 rtx target, int do_load)
18620 {
18621 rtx pat;
18622 tree arg0 = CALL_EXPR_ARG (exp, 0);
18623 rtx op0 = expand_normal (arg0);
18624 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18625 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18626
18627 if (optimize || !target
18628 || GET_MODE (target) != tmode
18629 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18630 target = gen_reg_rtx (tmode);
18631 if (do_load)
18632 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
18633 else
18634 {
18635 if (VECTOR_MODE_P (mode0))
18636 op0 = safe_vector_operand (op0, mode0);
18637
18638 if ((optimize && !register_operand (op0, mode0))
18639 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18640 op0 = copy_to_mode_reg (mode0, op0);
18641 }
18642
18643 switch (icode)
18644 {
18645 case CODE_FOR_sse4_1_roundpd:
18646 case CODE_FOR_sse4_1_roundps:
18647 {
18648 tree arg1 = CALL_EXPR_ARG (exp, 1);
18649 rtx op1 = expand_normal (arg1);
18650 enum machine_mode mode1 = insn_data[icode].operand[2].mode;
18651
18652 if (! (*insn_data[icode].operand[2].predicate) (op1, mode1))
18653 {
18654 error ("the second argument must be a 4-bit immediate");
18655 return const0_rtx;
18656 }
18657 pat = GEN_FCN (icode) (target, op0, op1);
18658 }
18659 break;
18660 default:
18661 pat = GEN_FCN (icode) (target, op0);
18662 break;
18663 }
18664
18665 if (! pat)
18666 return 0;
18667 emit_insn (pat);
18668 return target;
18669 }
18670
18671 /* Subroutine of ix86_expand_builtin to take care of three special unop insns:
18672 sqrtss, rsqrtss, rcpss. */
18673
18674 static rtx
18675 ix86_expand_unop1_builtin (enum insn_code icode, tree exp, rtx target)
18676 {
18677 rtx pat;
18678 tree arg0 = CALL_EXPR_ARG (exp, 0);
18679 rtx op1, op0 = expand_normal (arg0);
18680 enum machine_mode tmode = insn_data[icode].operand[0].mode;
18681 enum machine_mode mode0 = insn_data[icode].operand[1].mode;
18682
18683 if (optimize || !target
18684 || GET_MODE (target) != tmode
18685 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
18686 target = gen_reg_rtx (tmode);
18687
18688 if (VECTOR_MODE_P (mode0))
18689 op0 = safe_vector_operand (op0, mode0);
18690
18691 if ((optimize && !register_operand (op0, mode0))
18692 || ! (*insn_data[icode].operand[1].predicate) (op0, mode0))
18693 op0 = copy_to_mode_reg (mode0, op0);
18694
18695 op1 = op0;
18696 if (! (*insn_data[icode].operand[2].predicate) (op1, mode0))
18697 op1 = copy_to_mode_reg (mode0, op1);
18698
18699 pat = GEN_FCN (icode) (target, op0, op1);
18700 if (! pat)
18701 return 0;
18702 emit_insn (pat);
18703 return target;
18704 }
18705
18706 /* Subroutine of ix86_expand_builtin to take care of comparison insns. */
18707
18708 static rtx
18709 ix86_expand_sse_compare (const struct builtin_description *d, tree exp,
18710 rtx target)
18711 {
18712 rtx pat;
18713 tree arg0 = CALL_EXPR_ARG (exp, 0);
18714 tree arg1 = CALL_EXPR_ARG (exp, 1);
18715 rtx op0 = expand_normal (arg0);
18716 rtx op1 = expand_normal (arg1);
18717 rtx op2;
18718 enum machine_mode tmode = insn_data[d->icode].operand[0].mode;
18719 enum machine_mode mode0 = insn_data[d->icode].operand[1].mode;
18720 enum machine_mode mode1 = insn_data[d->icode].operand[2].mode;
18721 enum rtx_code comparison = d->comparison;
18722
18723 if (VECTOR_MODE_P (mode0))
18724 op0 = safe_vector_operand (op0, mode0);
18725 if (VECTOR_MODE_P (mode1))
18726 op1 = safe_vector_operand (op1, mode1);
18727
18728 /* Swap operands if we have a comparison that isn't available in
18729 hardware. */
18730 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18731 {
18732 rtx tmp = gen_reg_rtx (mode1);
18733 emit_move_insn (tmp, op1);
18734 op1 = op0;
18735 op0 = tmp;
18736 }
18737
18738 if (optimize || !target
18739 || GET_MODE (target) != tmode
18740 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode))
18741 target = gen_reg_rtx (tmode);
18742
18743 if ((optimize && !register_operand (op0, mode0))
18744 || ! (*insn_data[d->icode].operand[1].predicate) (op0, mode0))
18745 op0 = copy_to_mode_reg (mode0, op0);
18746 if ((optimize && !register_operand (op1, mode1))
18747 || ! (*insn_data[d->icode].operand[2].predicate) (op1, mode1))
18748 op1 = copy_to_mode_reg (mode1, op1);
18749
18750 op2 = gen_rtx_fmt_ee (comparison, mode0, op0, op1);
18751 pat = GEN_FCN (d->icode) (target, op0, op1, op2);
18752 if (! pat)
18753 return 0;
18754 emit_insn (pat);
18755 return target;
18756 }
18757
18758 /* Subroutine of ix86_expand_builtin to take care of comi insns. */
18759
18760 static rtx
18761 ix86_expand_sse_comi (const struct builtin_description *d, tree exp,
18762 rtx target)
18763 {
18764 rtx pat;
18765 tree arg0 = CALL_EXPR_ARG (exp, 0);
18766 tree arg1 = CALL_EXPR_ARG (exp, 1);
18767 rtx op0 = expand_normal (arg0);
18768 rtx op1 = expand_normal (arg1);
18769 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18770 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18771 enum rtx_code comparison = d->comparison;
18772
18773 if (VECTOR_MODE_P (mode0))
18774 op0 = safe_vector_operand (op0, mode0);
18775 if (VECTOR_MODE_P (mode1))
18776 op1 = safe_vector_operand (op1, mode1);
18777
18778 /* Swap operands if we have a comparison that isn't available in
18779 hardware. */
18780 if (d->flag & BUILTIN_DESC_SWAP_OPERANDS)
18781 {
18782 rtx tmp = op1;
18783 op1 = op0;
18784 op0 = tmp;
18785 }
18786
18787 target = gen_reg_rtx (SImode);
18788 emit_move_insn (target, const0_rtx);
18789 target = gen_rtx_SUBREG (QImode, target, 0);
18790
18791 if ((optimize && !register_operand (op0, mode0))
18792 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18793 op0 = copy_to_mode_reg (mode0, op0);
18794 if ((optimize && !register_operand (op1, mode1))
18795 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18796 op1 = copy_to_mode_reg (mode1, op1);
18797
18798 pat = GEN_FCN (d->icode) (op0, op1);
18799 if (! pat)
18800 return 0;
18801 emit_insn (pat);
18802 emit_insn (gen_rtx_SET (VOIDmode,
18803 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18804 gen_rtx_fmt_ee (comparison, QImode,
18805 SET_DEST (pat),
18806 const0_rtx)));
18807
18808 return SUBREG_REG (target);
18809 }
18810
18811 /* Subroutine of ix86_expand_builtin to take care of ptest insns. */
18812
18813 static rtx
18814 ix86_expand_sse_ptest (const struct builtin_description *d, tree exp,
18815 rtx target)
18816 {
18817 rtx pat;
18818 tree arg0 = CALL_EXPR_ARG (exp, 0);
18819 tree arg1 = CALL_EXPR_ARG (exp, 1);
18820 rtx op0 = expand_normal (arg0);
18821 rtx op1 = expand_normal (arg1);
18822 enum machine_mode mode0 = insn_data[d->icode].operand[0].mode;
18823 enum machine_mode mode1 = insn_data[d->icode].operand[1].mode;
18824 enum rtx_code comparison = d->comparison;
18825
18826 if (VECTOR_MODE_P (mode0))
18827 op0 = safe_vector_operand (op0, mode0);
18828 if (VECTOR_MODE_P (mode1))
18829 op1 = safe_vector_operand (op1, mode1);
18830
18831 target = gen_reg_rtx (SImode);
18832 emit_move_insn (target, const0_rtx);
18833 target = gen_rtx_SUBREG (QImode, target, 0);
18834
18835 if ((optimize && !register_operand (op0, mode0))
18836 || !(*insn_data[d->icode].operand[0].predicate) (op0, mode0))
18837 op0 = copy_to_mode_reg (mode0, op0);
18838 if ((optimize && !register_operand (op1, mode1))
18839 || !(*insn_data[d->icode].operand[1].predicate) (op1, mode1))
18840 op1 = copy_to_mode_reg (mode1, op1);
18841
18842 pat = GEN_FCN (d->icode) (op0, op1);
18843 if (! pat)
18844 return 0;
18845 emit_insn (pat);
18846 emit_insn (gen_rtx_SET (VOIDmode,
18847 gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18848 gen_rtx_fmt_ee (comparison, QImode,
18849 SET_DEST (pat),
18850 const0_rtx)));
18851
18852 return SUBREG_REG (target);
18853 }
18854
18855 /* Subroutine of ix86_expand_builtin to take care of pcmpestr[im] insns. */
18856
18857 static rtx
18858 ix86_expand_sse_pcmpestr (const struct builtin_description *d,
18859 tree exp, rtx target)
18860 {
18861 rtx pat;
18862 tree arg0 = CALL_EXPR_ARG (exp, 0);
18863 tree arg1 = CALL_EXPR_ARG (exp, 1);
18864 tree arg2 = CALL_EXPR_ARG (exp, 2);
18865 tree arg3 = CALL_EXPR_ARG (exp, 3);
18866 tree arg4 = CALL_EXPR_ARG (exp, 4);
18867 rtx scratch0, scratch1;
18868 rtx op0 = expand_normal (arg0);
18869 rtx op1 = expand_normal (arg1);
18870 rtx op2 = expand_normal (arg2);
18871 rtx op3 = expand_normal (arg3);
18872 rtx op4 = expand_normal (arg4);
18873 enum machine_mode tmode0, tmode1, modev2, modei3, modev4, modei5, modeimm;
18874
18875 tmode0 = insn_data[d->icode].operand[0].mode;
18876 tmode1 = insn_data[d->icode].operand[1].mode;
18877 modev2 = insn_data[d->icode].operand[2].mode;
18878 modei3 = insn_data[d->icode].operand[3].mode;
18879 modev4 = insn_data[d->icode].operand[4].mode;
18880 modei5 = insn_data[d->icode].operand[5].mode;
18881 modeimm = insn_data[d->icode].operand[6].mode;
18882
18883 if (VECTOR_MODE_P (modev2))
18884 op0 = safe_vector_operand (op0, modev2);
18885 if (VECTOR_MODE_P (modev4))
18886 op2 = safe_vector_operand (op2, modev4);
18887
18888 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18889 op0 = copy_to_mode_reg (modev2, op0);
18890 if (! (*insn_data[d->icode].operand[3].predicate) (op1, modei3))
18891 op1 = copy_to_mode_reg (modei3, op1);
18892 if ((optimize && !register_operand (op2, modev4))
18893 || !(*insn_data[d->icode].operand[4].predicate) (op2, modev4))
18894 op2 = copy_to_mode_reg (modev4, op2);
18895 if (! (*insn_data[d->icode].operand[5].predicate) (op3, modei5))
18896 op3 = copy_to_mode_reg (modei5, op3);
18897
18898 if (! (*insn_data[d->icode].operand[6].predicate) (op4, modeimm))
18899 {
18900 error ("the fifth argument must be a 8-bit immediate");
18901 return const0_rtx;
18902 }
18903
18904 if (d->code == IX86_BUILTIN_PCMPESTRI128)
18905 {
18906 if (optimize || !target
18907 || GET_MODE (target) != tmode0
18908 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
18909 target = gen_reg_rtx (tmode0);
18910
18911 scratch1 = gen_reg_rtx (tmode1);
18912
18913 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2, op3, op4);
18914 }
18915 else if (d->code == IX86_BUILTIN_PCMPESTRM128)
18916 {
18917 if (optimize || !target
18918 || GET_MODE (target) != tmode1
18919 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
18920 target = gen_reg_rtx (tmode1);
18921
18922 scratch0 = gen_reg_rtx (tmode0);
18923
18924 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2, op3, op4);
18925 }
18926 else
18927 {
18928 gcc_assert (d->flag);
18929
18930 scratch0 = gen_reg_rtx (tmode0);
18931 scratch1 = gen_reg_rtx (tmode1);
18932
18933 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2, op3, op4);
18934 }
18935
18936 if (! pat)
18937 return 0;
18938
18939 emit_insn (pat);
18940
18941 if (d->flag)
18942 {
18943 target = gen_reg_rtx (SImode);
18944 emit_move_insn (target, const0_rtx);
18945 target = gen_rtx_SUBREG (QImode, target, 0);
18946
18947 emit_insn
18948 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
18949 gen_rtx_fmt_ee (EQ, QImode,
18950 gen_rtx_REG ((enum machine_mode) d->flag,
18951 FLAGS_REG),
18952 const0_rtx)));
18953 return SUBREG_REG (target);
18954 }
18955 else
18956 return target;
18957 }
18958
18959
18960 /* Subroutine of ix86_expand_builtin to take care of pcmpistr[im] insns. */
18961
18962 static rtx
18963 ix86_expand_sse_pcmpistr (const struct builtin_description *d,
18964 tree exp, rtx target)
18965 {
18966 rtx pat;
18967 tree arg0 = CALL_EXPR_ARG (exp, 0);
18968 tree arg1 = CALL_EXPR_ARG (exp, 1);
18969 tree arg2 = CALL_EXPR_ARG (exp, 2);
18970 rtx scratch0, scratch1;
18971 rtx op0 = expand_normal (arg0);
18972 rtx op1 = expand_normal (arg1);
18973 rtx op2 = expand_normal (arg2);
18974 enum machine_mode tmode0, tmode1, modev2, modev3, modeimm;
18975
18976 tmode0 = insn_data[d->icode].operand[0].mode;
18977 tmode1 = insn_data[d->icode].operand[1].mode;
18978 modev2 = insn_data[d->icode].operand[2].mode;
18979 modev3 = insn_data[d->icode].operand[3].mode;
18980 modeimm = insn_data[d->icode].operand[4].mode;
18981
18982 if (VECTOR_MODE_P (modev2))
18983 op0 = safe_vector_operand (op0, modev2);
18984 if (VECTOR_MODE_P (modev3))
18985 op1 = safe_vector_operand (op1, modev3);
18986
18987 if (! (*insn_data[d->icode].operand[2].predicate) (op0, modev2))
18988 op0 = copy_to_mode_reg (modev2, op0);
18989 if ((optimize && !register_operand (op1, modev3))
18990 || !(*insn_data[d->icode].operand[3].predicate) (op1, modev3))
18991 op1 = copy_to_mode_reg (modev3, op1);
18992
18993 if (! (*insn_data[d->icode].operand[4].predicate) (op2, modeimm))
18994 {
18995 error ("the third argument must be a 8-bit immediate");
18996 return const0_rtx;
18997 }
18998
18999 if (d->code == IX86_BUILTIN_PCMPISTRI128)
19000 {
19001 if (optimize || !target
19002 || GET_MODE (target) != tmode0
19003 || ! (*insn_data[d->icode].operand[0].predicate) (target, tmode0))
19004 target = gen_reg_rtx (tmode0);
19005
19006 scratch1 = gen_reg_rtx (tmode1);
19007
19008 pat = GEN_FCN (d->icode) (target, scratch1, op0, op1, op2);
19009 }
19010 else if (d->code == IX86_BUILTIN_PCMPISTRM128)
19011 {
19012 if (optimize || !target
19013 || GET_MODE (target) != tmode1
19014 || ! (*insn_data[d->icode].operand[1].predicate) (target, tmode1))
19015 target = gen_reg_rtx (tmode1);
19016
19017 scratch0 = gen_reg_rtx (tmode0);
19018
19019 pat = GEN_FCN (d->icode) (scratch0, target, op0, op1, op2);
19020 }
19021 else
19022 {
19023 gcc_assert (d->flag);
19024
19025 scratch0 = gen_reg_rtx (tmode0);
19026 scratch1 = gen_reg_rtx (tmode1);
19027
19028 pat = GEN_FCN (d->icode) (scratch0, scratch1, op0, op1, op2);
19029 }
19030
19031 if (! pat)
19032 return 0;
19033
19034 emit_insn (pat);
19035
19036 if (d->flag)
19037 {
19038 target = gen_reg_rtx (SImode);
19039 emit_move_insn (target, const0_rtx);
19040 target = gen_rtx_SUBREG (QImode, target, 0);
19041
19042 emit_insn
19043 (gen_rtx_SET (VOIDmode, gen_rtx_STRICT_LOW_PART (VOIDmode, target),
19044 gen_rtx_fmt_ee (EQ, QImode,
19045 gen_rtx_REG ((enum machine_mode) d->flag,
19046 FLAGS_REG),
19047 const0_rtx)));
19048 return SUBREG_REG (target);
19049 }
19050 else
19051 return target;
19052 }
19053
19054 /* Return the integer constant in ARG. Constrain it to be in the range
19055 of the subparts of VEC_TYPE; issue an error if not. */
19056
19057 static int
19058 get_element_number (tree vec_type, tree arg)
19059 {
19060 unsigned HOST_WIDE_INT elt, max = TYPE_VECTOR_SUBPARTS (vec_type) - 1;
19061
19062 if (!host_integerp (arg, 1)
19063 || (elt = tree_low_cst (arg, 1), elt > max))
19064 {
19065 error ("selector must be an integer constant in the range 0..%wi", max);
19066 return 0;
19067 }
19068
19069 return elt;
19070 }
19071
19072 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19073 ix86_expand_vector_init. We DO have language-level syntax for this, in
19074 the form of (type){ init-list }. Except that since we can't place emms
19075 instructions from inside the compiler, we can't allow the use of MMX
19076 registers unless the user explicitly asks for it. So we do *not* define
19077 vec_set/vec_extract/vec_init patterns for MMX modes in mmx.md. Instead
19078 we have builtins invoked by mmintrin.h that gives us license to emit
19079 these sorts of instructions. */
19080
19081 static rtx
19082 ix86_expand_vec_init_builtin (tree type, tree exp, rtx target)
19083 {
19084 enum machine_mode tmode = TYPE_MODE (type);
19085 enum machine_mode inner_mode = GET_MODE_INNER (tmode);
19086 int i, n_elt = GET_MODE_NUNITS (tmode);
19087 rtvec v = rtvec_alloc (n_elt);
19088
19089 gcc_assert (VECTOR_MODE_P (tmode));
19090 gcc_assert (call_expr_nargs (exp) == n_elt);
19091
19092 for (i = 0; i < n_elt; ++i)
19093 {
19094 rtx x = expand_normal (CALL_EXPR_ARG (exp, i));
19095 RTVEC_ELT (v, i) = gen_lowpart (inner_mode, x);
19096 }
19097
19098 if (!target || !register_operand (target, tmode))
19099 target = gen_reg_rtx (tmode);
19100
19101 ix86_expand_vector_init (true, target, gen_rtx_PARALLEL (tmode, v));
19102 return target;
19103 }
19104
19105 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19106 ix86_expand_vector_extract. They would be redundant (for non-MMX) if we
19107 had a language-level syntax for referencing vector elements. */
19108
19109 static rtx
19110 ix86_expand_vec_ext_builtin (tree exp, rtx target)
19111 {
19112 enum machine_mode tmode, mode0;
19113 tree arg0, arg1;
19114 int elt;
19115 rtx op0;
19116
19117 arg0 = CALL_EXPR_ARG (exp, 0);
19118 arg1 = CALL_EXPR_ARG (exp, 1);
19119
19120 op0 = expand_normal (arg0);
19121 elt = get_element_number (TREE_TYPE (arg0), arg1);
19122
19123 tmode = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19124 mode0 = TYPE_MODE (TREE_TYPE (arg0));
19125 gcc_assert (VECTOR_MODE_P (mode0));
19126
19127 op0 = force_reg (mode0, op0);
19128
19129 if (optimize || !target || !register_operand (target, tmode))
19130 target = gen_reg_rtx (tmode);
19131
19132 ix86_expand_vector_extract (true, target, op0, elt);
19133
19134 return target;
19135 }
19136
19137 /* A subroutine of ix86_expand_builtin. These builtins are a wrapper around
19138 ix86_expand_vector_set. They would be redundant (for non-MMX) if we had
19139 a language-level syntax for referencing vector elements. */
19140
19141 static rtx
19142 ix86_expand_vec_set_builtin (tree exp)
19143 {
19144 enum machine_mode tmode, mode1;
19145 tree arg0, arg1, arg2;
19146 int elt;
19147 rtx op0, op1, target;
19148
19149 arg0 = CALL_EXPR_ARG (exp, 0);
19150 arg1 = CALL_EXPR_ARG (exp, 1);
19151 arg2 = CALL_EXPR_ARG (exp, 2);
19152
19153 tmode = TYPE_MODE (TREE_TYPE (arg0));
19154 mode1 = TYPE_MODE (TREE_TYPE (TREE_TYPE (arg0)));
19155 gcc_assert (VECTOR_MODE_P (tmode));
19156
19157 op0 = expand_expr (arg0, NULL_RTX, tmode, EXPAND_NORMAL);
19158 op1 = expand_expr (arg1, NULL_RTX, mode1, EXPAND_NORMAL);
19159 elt = get_element_number (TREE_TYPE (arg0), arg2);
19160
19161 if (GET_MODE (op1) != mode1 && GET_MODE (op1) != VOIDmode)
19162 op1 = convert_modes (mode1, GET_MODE (op1), op1, true);
19163
19164 op0 = force_reg (tmode, op0);
19165 op1 = force_reg (mode1, op1);
19166
19167 /* OP0 is the source of these builtin functions and shouldn't be
19168 modified. Create a copy, use it and return it as target. */
19169 target = gen_reg_rtx (tmode);
19170 emit_move_insn (target, op0);
19171 ix86_expand_vector_set (true, target, op1, elt);
19172
19173 return target;
19174 }
19175
19176 /* Expand an expression EXP that calls a built-in function,
19177 with result going to TARGET if that's convenient
19178 (and in mode MODE if that's convenient).
19179 SUBTARGET may be used as the target for computing one of EXP's operands.
19180 IGNORE is nonzero if the value is to be ignored. */
19181
19182 static rtx
19183 ix86_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
19184 enum machine_mode mode ATTRIBUTE_UNUSED,
19185 int ignore ATTRIBUTE_UNUSED)
19186 {
19187 const struct builtin_description *d;
19188 size_t i;
19189 enum insn_code icode;
19190 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
19191 tree arg0, arg1, arg2, arg3;
19192 rtx op0, op1, op2, op3, pat;
19193 enum machine_mode tmode, mode0, mode1, mode2, mode3, mode4;
19194 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
19195
19196 switch (fcode)
19197 {
19198 case IX86_BUILTIN_EMMS:
19199 emit_insn (gen_mmx_emms ());
19200 return 0;
19201
19202 case IX86_BUILTIN_SFENCE:
19203 emit_insn (gen_sse_sfence ());
19204 return 0;
19205
19206 case IX86_BUILTIN_MASKMOVQ:
19207 case IX86_BUILTIN_MASKMOVDQU:
19208 icode = (fcode == IX86_BUILTIN_MASKMOVQ
19209 ? CODE_FOR_mmx_maskmovq
19210 : CODE_FOR_sse2_maskmovdqu);
19211 /* Note the arg order is different from the operand order. */
19212 arg1 = CALL_EXPR_ARG (exp, 0);
19213 arg2 = CALL_EXPR_ARG (exp, 1);
19214 arg0 = CALL_EXPR_ARG (exp, 2);
19215 op0 = expand_normal (arg0);
19216 op1 = expand_normal (arg1);
19217 op2 = expand_normal (arg2);
19218 mode0 = insn_data[icode].operand[0].mode;
19219 mode1 = insn_data[icode].operand[1].mode;
19220 mode2 = insn_data[icode].operand[2].mode;
19221
19222 op0 = force_reg (Pmode, op0);
19223 op0 = gen_rtx_MEM (mode1, op0);
19224
19225 if (! (*insn_data[icode].operand[0].predicate) (op0, mode0))
19226 op0 = copy_to_mode_reg (mode0, op0);
19227 if (! (*insn_data[icode].operand[1].predicate) (op1, mode1))
19228 op1 = copy_to_mode_reg (mode1, op1);
19229 if (! (*insn_data[icode].operand[2].predicate) (op2, mode2))
19230 op2 = copy_to_mode_reg (mode2, op2);
19231 pat = GEN_FCN (icode) (op0, op1, op2);
19232 if (! pat)
19233 return 0;
19234 emit_insn (pat);
19235 return 0;
19236
19237 case IX86_BUILTIN_RSQRTF:
19238 return ix86_expand_unop1_builtin (CODE_FOR_rsqrtsf2, exp, target);
19239
19240 case IX86_BUILTIN_SQRTSS:
19241 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmsqrtv4sf2, exp, target);
19242 case IX86_BUILTIN_RSQRTSS:
19243 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrsqrtv4sf2, exp, target);
19244 case IX86_BUILTIN_RCPSS:
19245 return ix86_expand_unop1_builtin (CODE_FOR_sse_vmrcpv4sf2, exp, target);
19246
19247 case IX86_BUILTIN_LOADUPS:
19248 return ix86_expand_unop_builtin (CODE_FOR_sse_movups, exp, target, 1);
19249
19250 case IX86_BUILTIN_STOREUPS:
19251 return ix86_expand_store_builtin (CODE_FOR_sse_movups, exp);
19252
19253 case IX86_BUILTIN_LOADHPS:
19254 case IX86_BUILTIN_LOADLPS:
19255 case IX86_BUILTIN_LOADHPD:
19256 case IX86_BUILTIN_LOADLPD:
19257 icode = (fcode == IX86_BUILTIN_LOADHPS ? CODE_FOR_sse_loadhps
19258 : fcode == IX86_BUILTIN_LOADLPS ? CODE_FOR_sse_loadlps
19259 : fcode == IX86_BUILTIN_LOADHPD ? CODE_FOR_sse2_loadhpd
19260 : CODE_FOR_sse2_loadlpd);
19261 arg0 = CALL_EXPR_ARG (exp, 0);
19262 arg1 = CALL_EXPR_ARG (exp, 1);
19263 op0 = expand_normal (arg0);
19264 op1 = expand_normal (arg1);
19265 tmode = insn_data[icode].operand[0].mode;
19266 mode0 = insn_data[icode].operand[1].mode;
19267 mode1 = insn_data[icode].operand[2].mode;
19268
19269 op0 = force_reg (mode0, op0);
19270 op1 = gen_rtx_MEM (mode1, copy_to_mode_reg (Pmode, op1));
19271 if (optimize || target == 0
19272 || GET_MODE (target) != tmode
19273 || !register_operand (target, tmode))
19274 target = gen_reg_rtx (tmode);
19275 pat = GEN_FCN (icode) (target, op0, op1);
19276 if (! pat)
19277 return 0;
19278 emit_insn (pat);
19279 return target;
19280
19281 case IX86_BUILTIN_STOREHPS:
19282 case IX86_BUILTIN_STORELPS:
19283 icode = (fcode == IX86_BUILTIN_STOREHPS ? CODE_FOR_sse_storehps
19284 : CODE_FOR_sse_storelps);
19285 arg0 = CALL_EXPR_ARG (exp, 0);
19286 arg1 = CALL_EXPR_ARG (exp, 1);
19287 op0 = expand_normal (arg0);
19288 op1 = expand_normal (arg1);
19289 mode0 = insn_data[icode].operand[0].mode;
19290 mode1 = insn_data[icode].operand[1].mode;
19291
19292 op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0));
19293 op1 = force_reg (mode1, op1);
19294
19295 pat = GEN_FCN (icode) (op0, op1);
19296 if (! pat)
19297 return 0;
19298 emit_insn (pat);
19299 return const0_rtx;
19300
19301 case IX86_BUILTIN_MOVNTPS:
19302 return ix86_expand_store_builtin (CODE_FOR_sse_movntv4sf, exp);
19303 case IX86_BUILTIN_MOVNTQ:
19304 return ix86_expand_store_builtin (CODE_FOR_sse_movntdi, exp);
19305
19306 case IX86_BUILTIN_LDMXCSR:
19307 op0 = expand_normal (CALL_EXPR_ARG (exp, 0));
19308 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19309 emit_move_insn (target, op0);
19310 emit_insn (gen_sse_ldmxcsr (target));
19311 return 0;
19312
19313 case IX86_BUILTIN_STMXCSR:
19314 target = assign_386_stack_local (SImode, SLOT_VIRTUAL);
19315 emit_insn (gen_sse_stmxcsr (target));
19316 return copy_to_mode_reg (SImode, target);
19317
19318 case IX86_BUILTIN_SHUFPS:
19319 case IX86_BUILTIN_SHUFPD:
19320 icode = (fcode == IX86_BUILTIN_SHUFPS
19321 ? CODE_FOR_sse_shufps
19322 : CODE_FOR_sse2_shufpd);
19323 arg0 = CALL_EXPR_ARG (exp, 0);
19324 arg1 = CALL_EXPR_ARG (exp, 1);
19325 arg2 = CALL_EXPR_ARG (exp, 2);
19326 op0 = expand_normal (arg0);
19327 op1 = expand_normal (arg1);
19328 op2 = expand_normal (arg2);
19329 tmode = insn_data[icode].operand[0].mode;
19330 mode0 = insn_data[icode].operand[1].mode;
19331 mode1 = insn_data[icode].operand[2].mode;
19332 mode2 = insn_data[icode].operand[3].mode;
19333
19334 if (! (*insn_data[icode].operand[1].predicate) (op0, mode0))
19335 op0 = copy_to_mode_reg (mode0, op0);
19336 if ((optimize && !register_operand (op1, mode1))
19337 || !(*insn_data[icode].operand[2].predicate) (op1, mode1))
19338 op1 = copy_to_mode_reg (mode1, op1);
19339 if (! (*insn_data[icode].operand[3].predicate) (op2, mode2))
19340 {
19341 /* @@@ better error message */
19342 error ("mask must be an immediate");
19343 return gen_reg_rtx (tmode);
19344 }
19345 if (optimize || target == 0
19346 || GET_MODE (target) != tmode
19347 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19348 target = gen_reg_rtx (tmode);
19349 pat = GEN_FCN (icode) (target, op0, op1, op2);
19350 if (! pat)
19351 return 0;
19352 emit_insn (pat);
19353 return target;
19354
19355 case IX86_BUILTIN_PSHUFW:
19356 case IX86_BUILTIN_PSHUFD:
19357 case IX86_BUILTIN_PSHUFHW:
19358 case IX86_BUILTIN_PSHUFLW:
19359 icode = ( fcode == IX86_BUILTIN_PSHUFHW ? CODE_FOR_sse2_pshufhw
19360 : fcode == IX86_BUILTIN_PSHUFLW ? CODE_FOR_sse2_pshuflw
19361 : fcode == IX86_BUILTIN_PSHUFD ? CODE_FOR_sse2_pshufd
19362 : CODE_FOR_mmx_pshufw);
19363 arg0 = CALL_EXPR_ARG (exp, 0);
19364 arg1 = CALL_EXPR_ARG (exp, 1);
19365 op0 = expand_normal (arg0);
19366 op1 = expand_normal (arg1);
19367 tmode = insn_data[icode].operand[0].mode;
19368 mode1 = insn_data[icode].operand[1].mode;
19369 mode2 = insn_data[icode].operand[2].mode;
19370
19371 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19372 op0 = copy_to_mode_reg (mode1, op0);
19373 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19374 {
19375 /* @@@ better error message */
19376 error ("mask must be an immediate");
19377 return const0_rtx;
19378 }
19379 if (target == 0
19380 || GET_MODE (target) != tmode
19381 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19382 target = gen_reg_rtx (tmode);
19383 pat = GEN_FCN (icode) (target, op0, op1);
19384 if (! pat)
19385 return 0;
19386 emit_insn (pat);
19387 return target;
19388
19389 case IX86_BUILTIN_PSLLWI128:
19390 icode = CODE_FOR_ashlv8hi3;
19391 goto do_pshifti;
19392 case IX86_BUILTIN_PSLLDI128:
19393 icode = CODE_FOR_ashlv4si3;
19394 goto do_pshifti;
19395 case IX86_BUILTIN_PSLLQI128:
19396 icode = CODE_FOR_ashlv2di3;
19397 goto do_pshifti;
19398 case IX86_BUILTIN_PSRAWI128:
19399 icode = CODE_FOR_ashrv8hi3;
19400 goto do_pshifti;
19401 case IX86_BUILTIN_PSRADI128:
19402 icode = CODE_FOR_ashrv4si3;
19403 goto do_pshifti;
19404 case IX86_BUILTIN_PSRLWI128:
19405 icode = CODE_FOR_lshrv8hi3;
19406 goto do_pshifti;
19407 case IX86_BUILTIN_PSRLDI128:
19408 icode = CODE_FOR_lshrv4si3;
19409 goto do_pshifti;
19410 case IX86_BUILTIN_PSRLQI128:
19411 icode = CODE_FOR_lshrv2di3;
19412 goto do_pshifti;
19413 do_pshifti:
19414 arg0 = CALL_EXPR_ARG (exp, 0);
19415 arg1 = CALL_EXPR_ARG (exp, 1);
19416 op0 = expand_normal (arg0);
19417 op1 = expand_normal (arg1);
19418
19419 if (!CONST_INT_P (op1))
19420 {
19421 error ("shift must be an immediate");
19422 return const0_rtx;
19423 }
19424 if (INTVAL (op1) < 0 || INTVAL (op1) > 255)
19425 op1 = GEN_INT (255);
19426
19427 tmode = insn_data[icode].operand[0].mode;
19428 mode1 = insn_data[icode].operand[1].mode;
19429 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19430 op0 = copy_to_reg (op0);
19431
19432 target = gen_reg_rtx (tmode);
19433 pat = GEN_FCN (icode) (target, op0, op1);
19434 if (!pat)
19435 return 0;
19436 emit_insn (pat);
19437 return target;
19438
19439 case IX86_BUILTIN_PSLLW128:
19440 icode = CODE_FOR_ashlv8hi3;
19441 goto do_pshift;
19442 case IX86_BUILTIN_PSLLD128:
19443 icode = CODE_FOR_ashlv4si3;
19444 goto do_pshift;
19445 case IX86_BUILTIN_PSLLQ128:
19446 icode = CODE_FOR_ashlv2di3;
19447 goto do_pshift;
19448 case IX86_BUILTIN_PSRAW128:
19449 icode = CODE_FOR_ashrv8hi3;
19450 goto do_pshift;
19451 case IX86_BUILTIN_PSRAD128:
19452 icode = CODE_FOR_ashrv4si3;
19453 goto do_pshift;
19454 case IX86_BUILTIN_PSRLW128:
19455 icode = CODE_FOR_lshrv8hi3;
19456 goto do_pshift;
19457 case IX86_BUILTIN_PSRLD128:
19458 icode = CODE_FOR_lshrv4si3;
19459 goto do_pshift;
19460 case IX86_BUILTIN_PSRLQ128:
19461 icode = CODE_FOR_lshrv2di3;
19462 goto do_pshift;
19463 do_pshift:
19464 arg0 = CALL_EXPR_ARG (exp, 0);
19465 arg1 = CALL_EXPR_ARG (exp, 1);
19466 op0 = expand_normal (arg0);
19467 op1 = expand_normal (arg1);
19468
19469 tmode = insn_data[icode].operand[0].mode;
19470 mode1 = insn_data[icode].operand[1].mode;
19471
19472 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19473 op0 = copy_to_reg (op0);
19474
19475 op1 = simplify_gen_subreg (TImode, op1, GET_MODE (op1), 0);
19476 if (! (*insn_data[icode].operand[2].predicate) (op1, TImode))
19477 op1 = copy_to_reg (op1);
19478
19479 target = gen_reg_rtx (tmode);
19480 pat = GEN_FCN (icode) (target, op0, op1);
19481 if (!pat)
19482 return 0;
19483 emit_insn (pat);
19484 return target;
19485
19486 case IX86_BUILTIN_PSLLDQI128:
19487 case IX86_BUILTIN_PSRLDQI128:
19488 icode = (fcode == IX86_BUILTIN_PSLLDQI128 ? CODE_FOR_sse2_ashlti3
19489 : CODE_FOR_sse2_lshrti3);
19490 arg0 = CALL_EXPR_ARG (exp, 0);
19491 arg1 = CALL_EXPR_ARG (exp, 1);
19492 op0 = expand_normal (arg0);
19493 op1 = expand_normal (arg1);
19494 tmode = insn_data[icode].operand[0].mode;
19495 mode1 = insn_data[icode].operand[1].mode;
19496 mode2 = insn_data[icode].operand[2].mode;
19497
19498 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19499 {
19500 op0 = copy_to_reg (op0);
19501 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19502 }
19503 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19504 {
19505 error ("shift must be an immediate");
19506 return const0_rtx;
19507 }
19508 target = gen_reg_rtx (V2DImode);
19509 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, V2DImode, 0),
19510 op0, op1);
19511 if (! pat)
19512 return 0;
19513 emit_insn (pat);
19514 return target;
19515
19516 case IX86_BUILTIN_FEMMS:
19517 emit_insn (gen_mmx_femms ());
19518 return NULL_RTX;
19519
19520 case IX86_BUILTIN_PAVGUSB:
19521 return ix86_expand_binop_builtin (CODE_FOR_mmx_uavgv8qi3, exp, target);
19522
19523 case IX86_BUILTIN_PF2ID:
19524 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2id, exp, target, 0);
19525
19526 case IX86_BUILTIN_PFACC:
19527 return ix86_expand_binop_builtin (CODE_FOR_mmx_haddv2sf3, exp, target);
19528
19529 case IX86_BUILTIN_PFADD:
19530 return ix86_expand_binop_builtin (CODE_FOR_mmx_addv2sf3, exp, target);
19531
19532 case IX86_BUILTIN_PFCMPEQ:
19533 return ix86_expand_binop_builtin (CODE_FOR_mmx_eqv2sf3, exp, target);
19534
19535 case IX86_BUILTIN_PFCMPGE:
19536 return ix86_expand_binop_builtin (CODE_FOR_mmx_gev2sf3, exp, target);
19537
19538 case IX86_BUILTIN_PFCMPGT:
19539 return ix86_expand_binop_builtin (CODE_FOR_mmx_gtv2sf3, exp, target);
19540
19541 case IX86_BUILTIN_PFMAX:
19542 return ix86_expand_binop_builtin (CODE_FOR_mmx_smaxv2sf3, exp, target);
19543
19544 case IX86_BUILTIN_PFMIN:
19545 return ix86_expand_binop_builtin (CODE_FOR_mmx_sminv2sf3, exp, target);
19546
19547 case IX86_BUILTIN_PFMUL:
19548 return ix86_expand_binop_builtin (CODE_FOR_mmx_mulv2sf3, exp, target);
19549
19550 case IX86_BUILTIN_PFRCP:
19551 return ix86_expand_unop_builtin (CODE_FOR_mmx_rcpv2sf2, exp, target, 0);
19552
19553 case IX86_BUILTIN_PFRCPIT1:
19554 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit1v2sf3, exp, target);
19555
19556 case IX86_BUILTIN_PFRCPIT2:
19557 return ix86_expand_binop_builtin (CODE_FOR_mmx_rcpit2v2sf3, exp, target);
19558
19559 case IX86_BUILTIN_PFRSQIT1:
19560 return ix86_expand_binop_builtin (CODE_FOR_mmx_rsqit1v2sf3, exp, target);
19561
19562 case IX86_BUILTIN_PFRSQRT:
19563 return ix86_expand_unop_builtin (CODE_FOR_mmx_rsqrtv2sf2, exp, target, 0);
19564
19565 case IX86_BUILTIN_PFSUB:
19566 return ix86_expand_binop_builtin (CODE_FOR_mmx_subv2sf3, exp, target);
19567
19568 case IX86_BUILTIN_PFSUBR:
19569 return ix86_expand_binop_builtin (CODE_FOR_mmx_subrv2sf3, exp, target);
19570
19571 case IX86_BUILTIN_PI2FD:
19572 return ix86_expand_unop_builtin (CODE_FOR_mmx_floatv2si2, exp, target, 0);
19573
19574 case IX86_BUILTIN_PMULHRW:
19575 return ix86_expand_binop_builtin (CODE_FOR_mmx_pmulhrwv4hi3, exp, target);
19576
19577 case IX86_BUILTIN_PF2IW:
19578 return ix86_expand_unop_builtin (CODE_FOR_mmx_pf2iw, exp, target, 0);
19579
19580 case IX86_BUILTIN_PFNACC:
19581 return ix86_expand_binop_builtin (CODE_FOR_mmx_hsubv2sf3, exp, target);
19582
19583 case IX86_BUILTIN_PFPNACC:
19584 return ix86_expand_binop_builtin (CODE_FOR_mmx_addsubv2sf3, exp, target);
19585
19586 case IX86_BUILTIN_PI2FW:
19587 return ix86_expand_unop_builtin (CODE_FOR_mmx_pi2fw, exp, target, 0);
19588
19589 case IX86_BUILTIN_PSWAPDSI:
19590 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2si2, exp, target, 0);
19591
19592 case IX86_BUILTIN_PSWAPDSF:
19593 return ix86_expand_unop_builtin (CODE_FOR_mmx_pswapdv2sf2, exp, target, 0);
19594
19595 case IX86_BUILTIN_SQRTSD:
19596 return ix86_expand_unop1_builtin (CODE_FOR_sse2_vmsqrtv2df2, exp, target);
19597 case IX86_BUILTIN_LOADUPD:
19598 return ix86_expand_unop_builtin (CODE_FOR_sse2_movupd, exp, target, 1);
19599 case IX86_BUILTIN_STOREUPD:
19600 return ix86_expand_store_builtin (CODE_FOR_sse2_movupd, exp);
19601
19602 case IX86_BUILTIN_MFENCE:
19603 emit_insn (gen_sse2_mfence ());
19604 return 0;
19605 case IX86_BUILTIN_LFENCE:
19606 emit_insn (gen_sse2_lfence ());
19607 return 0;
19608
19609 case IX86_BUILTIN_CLFLUSH:
19610 arg0 = CALL_EXPR_ARG (exp, 0);
19611 op0 = expand_normal (arg0);
19612 icode = CODE_FOR_sse2_clflush;
19613 if (! (*insn_data[icode].operand[0].predicate) (op0, Pmode))
19614 op0 = copy_to_mode_reg (Pmode, op0);
19615
19616 emit_insn (gen_sse2_clflush (op0));
19617 return 0;
19618
19619 case IX86_BUILTIN_MOVNTPD:
19620 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2df, exp);
19621 case IX86_BUILTIN_MOVNTDQ:
19622 return ix86_expand_store_builtin (CODE_FOR_sse2_movntv2di, exp);
19623 case IX86_BUILTIN_MOVNTI:
19624 return ix86_expand_store_builtin (CODE_FOR_sse2_movntsi, exp);
19625
19626 case IX86_BUILTIN_LOADDQU:
19627 return ix86_expand_unop_builtin (CODE_FOR_sse2_movdqu, exp, target, 1);
19628 case IX86_BUILTIN_STOREDQU:
19629 return ix86_expand_store_builtin (CODE_FOR_sse2_movdqu, exp);
19630
19631 case IX86_BUILTIN_MONITOR:
19632 arg0 = CALL_EXPR_ARG (exp, 0);
19633 arg1 = CALL_EXPR_ARG (exp, 1);
19634 arg2 = CALL_EXPR_ARG (exp, 2);
19635 op0 = expand_normal (arg0);
19636 op1 = expand_normal (arg1);
19637 op2 = expand_normal (arg2);
19638 if (!REG_P (op0))
19639 op0 = copy_to_mode_reg (Pmode, op0);
19640 if (!REG_P (op1))
19641 op1 = copy_to_mode_reg (SImode, op1);
19642 if (!REG_P (op2))
19643 op2 = copy_to_mode_reg (SImode, op2);
19644 if (!TARGET_64BIT)
19645 emit_insn (gen_sse3_monitor (op0, op1, op2));
19646 else
19647 emit_insn (gen_sse3_monitor64 (op0, op1, op2));
19648 return 0;
19649
19650 case IX86_BUILTIN_MWAIT:
19651 arg0 = CALL_EXPR_ARG (exp, 0);
19652 arg1 = CALL_EXPR_ARG (exp, 1);
19653 op0 = expand_normal (arg0);
19654 op1 = expand_normal (arg1);
19655 if (!REG_P (op0))
19656 op0 = copy_to_mode_reg (SImode, op0);
19657 if (!REG_P (op1))
19658 op1 = copy_to_mode_reg (SImode, op1);
19659 emit_insn (gen_sse3_mwait (op0, op1));
19660 return 0;
19661
19662 case IX86_BUILTIN_LDDQU:
19663 return ix86_expand_unop_builtin (CODE_FOR_sse3_lddqu, exp,
19664 target, 1);
19665
19666 case IX86_BUILTIN_PALIGNR:
19667 case IX86_BUILTIN_PALIGNR128:
19668 if (fcode == IX86_BUILTIN_PALIGNR)
19669 {
19670 icode = CODE_FOR_ssse3_palignrdi;
19671 mode = DImode;
19672 }
19673 else
19674 {
19675 icode = CODE_FOR_ssse3_palignrti;
19676 mode = V2DImode;
19677 }
19678 arg0 = CALL_EXPR_ARG (exp, 0);
19679 arg1 = CALL_EXPR_ARG (exp, 1);
19680 arg2 = CALL_EXPR_ARG (exp, 2);
19681 op0 = expand_expr (arg0, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19682 op1 = expand_expr (arg1, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19683 op2 = expand_expr (arg2, NULL_RTX, VOIDmode, EXPAND_NORMAL);
19684 tmode = insn_data[icode].operand[0].mode;
19685 mode1 = insn_data[icode].operand[1].mode;
19686 mode2 = insn_data[icode].operand[2].mode;
19687 mode3 = insn_data[icode].operand[3].mode;
19688
19689 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19690 {
19691 op0 = copy_to_reg (op0);
19692 op0 = simplify_gen_subreg (mode1, op0, GET_MODE (op0), 0);
19693 }
19694 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19695 {
19696 op1 = copy_to_reg (op1);
19697 op1 = simplify_gen_subreg (mode2, op1, GET_MODE (op1), 0);
19698 }
19699 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19700 {
19701 error ("shift must be an immediate");
19702 return const0_rtx;
19703 }
19704 target = gen_reg_rtx (mode);
19705 pat = GEN_FCN (icode) (simplify_gen_subreg (tmode, target, mode, 0),
19706 op0, op1, op2);
19707 if (! pat)
19708 return 0;
19709 emit_insn (pat);
19710 return target;
19711
19712 case IX86_BUILTIN_MOVNTDQA:
19713 return ix86_expand_unop_builtin (CODE_FOR_sse4_1_movntdqa, exp,
19714 target, 1);
19715
19716 case IX86_BUILTIN_MOVNTSD:
19717 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv2df, exp);
19718
19719 case IX86_BUILTIN_MOVNTSS:
19720 return ix86_expand_store_builtin (CODE_FOR_sse4a_vmmovntv4sf, exp);
19721
19722 case IX86_BUILTIN_INSERTQ:
19723 case IX86_BUILTIN_EXTRQ:
19724 icode = (fcode == IX86_BUILTIN_EXTRQ
19725 ? CODE_FOR_sse4a_extrq
19726 : CODE_FOR_sse4a_insertq);
19727 arg0 = CALL_EXPR_ARG (exp, 0);
19728 arg1 = CALL_EXPR_ARG (exp, 1);
19729 op0 = expand_normal (arg0);
19730 op1 = expand_normal (arg1);
19731 tmode = insn_data[icode].operand[0].mode;
19732 mode1 = insn_data[icode].operand[1].mode;
19733 mode2 = insn_data[icode].operand[2].mode;
19734 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19735 op0 = copy_to_mode_reg (mode1, op0);
19736 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19737 op1 = copy_to_mode_reg (mode2, op1);
19738 if (optimize || target == 0
19739 || GET_MODE (target) != tmode
19740 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19741 target = gen_reg_rtx (tmode);
19742 pat = GEN_FCN (icode) (target, op0, op1);
19743 if (! pat)
19744 return NULL_RTX;
19745 emit_insn (pat);
19746 return target;
19747
19748 case IX86_BUILTIN_EXTRQI:
19749 icode = CODE_FOR_sse4a_extrqi;
19750 arg0 = CALL_EXPR_ARG (exp, 0);
19751 arg1 = CALL_EXPR_ARG (exp, 1);
19752 arg2 = CALL_EXPR_ARG (exp, 2);
19753 op0 = expand_normal (arg0);
19754 op1 = expand_normal (arg1);
19755 op2 = expand_normal (arg2);
19756 tmode = insn_data[icode].operand[0].mode;
19757 mode1 = insn_data[icode].operand[1].mode;
19758 mode2 = insn_data[icode].operand[2].mode;
19759 mode3 = insn_data[icode].operand[3].mode;
19760 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19761 op0 = copy_to_mode_reg (mode1, op0);
19762 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19763 {
19764 error ("index mask must be an immediate");
19765 return gen_reg_rtx (tmode);
19766 }
19767 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19768 {
19769 error ("length mask must be an immediate");
19770 return gen_reg_rtx (tmode);
19771 }
19772 if (optimize || target == 0
19773 || GET_MODE (target) != tmode
19774 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19775 target = gen_reg_rtx (tmode);
19776 pat = GEN_FCN (icode) (target, op0, op1, op2);
19777 if (! pat)
19778 return NULL_RTX;
19779 emit_insn (pat);
19780 return target;
19781
19782 case IX86_BUILTIN_INSERTQI:
19783 icode = CODE_FOR_sse4a_insertqi;
19784 arg0 = CALL_EXPR_ARG (exp, 0);
19785 arg1 = CALL_EXPR_ARG (exp, 1);
19786 arg2 = CALL_EXPR_ARG (exp, 2);
19787 arg3 = CALL_EXPR_ARG (exp, 3);
19788 op0 = expand_normal (arg0);
19789 op1 = expand_normal (arg1);
19790 op2 = expand_normal (arg2);
19791 op3 = expand_normal (arg3);
19792 tmode = insn_data[icode].operand[0].mode;
19793 mode1 = insn_data[icode].operand[1].mode;
19794 mode2 = insn_data[icode].operand[2].mode;
19795 mode3 = insn_data[icode].operand[3].mode;
19796 mode4 = insn_data[icode].operand[4].mode;
19797
19798 if (! (*insn_data[icode].operand[1].predicate) (op0, mode1))
19799 op0 = copy_to_mode_reg (mode1, op0);
19800
19801 if (! (*insn_data[icode].operand[2].predicate) (op1, mode2))
19802 op1 = copy_to_mode_reg (mode2, op1);
19803
19804 if (! (*insn_data[icode].operand[3].predicate) (op2, mode3))
19805 {
19806 error ("index mask must be an immediate");
19807 return gen_reg_rtx (tmode);
19808 }
19809 if (! (*insn_data[icode].operand[4].predicate) (op3, mode4))
19810 {
19811 error ("length mask must be an immediate");
19812 return gen_reg_rtx (tmode);
19813 }
19814 if (optimize || target == 0
19815 || GET_MODE (target) != tmode
19816 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
19817 target = gen_reg_rtx (tmode);
19818 pat = GEN_FCN (icode) (target, op0, op1, op2, op3);
19819 if (! pat)
19820 return NULL_RTX;
19821 emit_insn (pat);
19822 return target;
19823
19824 case IX86_BUILTIN_VEC_INIT_V2SI:
19825 case IX86_BUILTIN_VEC_INIT_V4HI:
19826 case IX86_BUILTIN_VEC_INIT_V8QI:
19827 return ix86_expand_vec_init_builtin (TREE_TYPE (exp), exp, target);
19828
19829 case IX86_BUILTIN_VEC_EXT_V2DF:
19830 case IX86_BUILTIN_VEC_EXT_V2DI:
19831 case IX86_BUILTIN_VEC_EXT_V4SF:
19832 case IX86_BUILTIN_VEC_EXT_V4SI:
19833 case IX86_BUILTIN_VEC_EXT_V8HI:
19834 case IX86_BUILTIN_VEC_EXT_V2SI:
19835 case IX86_BUILTIN_VEC_EXT_V4HI:
19836 case IX86_BUILTIN_VEC_EXT_V16QI:
19837 return ix86_expand_vec_ext_builtin (exp, target);
19838
19839 case IX86_BUILTIN_VEC_SET_V2DI:
19840 case IX86_BUILTIN_VEC_SET_V4SF:
19841 case IX86_BUILTIN_VEC_SET_V4SI:
19842 case IX86_BUILTIN_VEC_SET_V8HI:
19843 case IX86_BUILTIN_VEC_SET_V4HI:
19844 case IX86_BUILTIN_VEC_SET_V16QI:
19845 return ix86_expand_vec_set_builtin (exp);
19846
19847 case IX86_BUILTIN_INFQ:
19848 {
19849 REAL_VALUE_TYPE inf;
19850 rtx tmp;
19851
19852 real_inf (&inf);
19853 tmp = CONST_DOUBLE_FROM_REAL_VALUE (inf, mode);
19854
19855 tmp = validize_mem (force_const_mem (mode, tmp));
19856
19857 if (target == 0)
19858 target = gen_reg_rtx (mode);
19859
19860 emit_move_insn (target, tmp);
19861 return target;
19862 }
19863
19864 case IX86_BUILTIN_FABSQ:
19865 return ix86_expand_unop_builtin (CODE_FOR_abstf2, exp, target, 0);
19866
19867 case IX86_BUILTIN_COPYSIGNQ:
19868 return ix86_expand_binop_builtin (CODE_FOR_copysigntf3, exp, target);
19869
19870 default:
19871 break;
19872 }
19873
19874 for (i = 0, d = bdesc_sse_3arg;
19875 i < ARRAY_SIZE (bdesc_sse_3arg);
19876 i++, d++)
19877 if (d->code == fcode)
19878 return ix86_expand_sse_4_operands_builtin (d->icode, exp,
19879 target);
19880
19881 for (i = 0, d = bdesc_2arg; i < ARRAY_SIZE (bdesc_2arg); i++, d++)
19882 if (d->code == fcode)
19883 {
19884 /* Compares are treated specially. */
19885 if (d->icode == CODE_FOR_sse_maskcmpv4sf3
19886 || d->icode == CODE_FOR_sse_vmmaskcmpv4sf3
19887 || d->icode == CODE_FOR_sse2_maskcmpv2df3
19888 || d->icode == CODE_FOR_sse2_vmmaskcmpv2df3)
19889 return ix86_expand_sse_compare (d, exp, target);
19890
19891 return ix86_expand_binop_builtin (d->icode, exp, target);
19892 }
19893
19894 for (i = 0, d = bdesc_1arg; i < ARRAY_SIZE (bdesc_1arg); i++, d++)
19895 if (d->code == fcode)
19896 return ix86_expand_unop_builtin (d->icode, exp, target, 0);
19897
19898 for (i = 0, d = bdesc_comi; i < ARRAY_SIZE (bdesc_comi); i++, d++)
19899 if (d->code == fcode)
19900 return ix86_expand_sse_comi (d, exp, target);
19901
19902 for (i = 0, d = bdesc_ptest; i < ARRAY_SIZE (bdesc_ptest); i++, d++)
19903 if (d->code == fcode)
19904 return ix86_expand_sse_ptest (d, exp, target);
19905
19906 for (i = 0, d = bdesc_crc32; i < ARRAY_SIZE (bdesc_crc32); i++, d++)
19907 if (d->code == fcode)
19908 return ix86_expand_crc32 (d->icode, exp, target);
19909
19910 for (i = 0, d = bdesc_pcmpestr;
19911 i < ARRAY_SIZE (bdesc_pcmpestr);
19912 i++, d++)
19913 if (d->code == fcode)
19914 return ix86_expand_sse_pcmpestr (d, exp, target);
19915
19916 for (i = 0, d = bdesc_pcmpistr;
19917 i < ARRAY_SIZE (bdesc_pcmpistr);
19918 i++, d++)
19919 if (d->code == fcode)
19920 return ix86_expand_sse_pcmpistr (d, exp, target);
19921
19922 gcc_unreachable ();
19923 }
19924
19925 /* Returns a function decl for a vectorized version of the builtin function
19926 with builtin function code FN and the result vector type TYPE, or NULL_TREE
19927 if it is not available. */
19928
19929 static tree
19930 ix86_builtin_vectorized_function (unsigned int fn, tree type_out,
19931 tree type_in)
19932 {
19933 enum machine_mode in_mode, out_mode;
19934 int in_n, out_n;
19935
19936 if (TREE_CODE (type_out) != VECTOR_TYPE
19937 || TREE_CODE (type_in) != VECTOR_TYPE)
19938 return NULL_TREE;
19939
19940 out_mode = TYPE_MODE (TREE_TYPE (type_out));
19941 out_n = TYPE_VECTOR_SUBPARTS (type_out);
19942 in_mode = TYPE_MODE (TREE_TYPE (type_in));
19943 in_n = TYPE_VECTOR_SUBPARTS (type_in);
19944
19945 switch (fn)
19946 {
19947 case BUILT_IN_SQRT:
19948 if (out_mode == DFmode && out_n == 2
19949 && in_mode == DFmode && in_n == 2)
19950 return ix86_builtins[IX86_BUILTIN_SQRTPD];
19951 break;
19952
19953 case BUILT_IN_SQRTF:
19954 if (out_mode == SFmode && out_n == 4
19955 && in_mode == SFmode && in_n == 4)
19956 return ix86_builtins[IX86_BUILTIN_SQRTPS];
19957 break;
19958
19959 case BUILT_IN_LRINT:
19960 if (out_mode == SImode && out_n == 4
19961 && in_mode == DFmode && in_n == 2)
19962 return ix86_builtins[IX86_BUILTIN_VEC_PACK_SFIX];
19963 break;
19964
19965 case BUILT_IN_LRINTF:
19966 if (out_mode == SImode && out_n == 4
19967 && in_mode == SFmode && in_n == 4)
19968 return ix86_builtins[IX86_BUILTIN_CVTPS2DQ];
19969 break;
19970
19971 default:
19972 ;
19973 }
19974
19975 /* Dispatch to a handler for a vectorization library. */
19976 if (ix86_veclib_handler)
19977 return (*ix86_veclib_handler)(fn, type_out, type_in);
19978
19979 return NULL_TREE;
19980 }
19981
19982 /* Handler for an ACML-style interface to a library with vectorized
19983 intrinsics. */
19984
19985 static tree
19986 ix86_veclibabi_acml (enum built_in_function fn, tree type_out, tree type_in)
19987 {
19988 char name[20] = "__vr.._";
19989 tree fntype, new_fndecl, args;
19990 unsigned arity;
19991 const char *bname;
19992 enum machine_mode el_mode, in_mode;
19993 int n, in_n;
19994
19995 /* The ACML is 64bits only and suitable for unsafe math only as
19996 it does not correctly support parts of IEEE with the required
19997 precision such as denormals. */
19998 if (!TARGET_64BIT
19999 || !flag_unsafe_math_optimizations)
20000 return NULL_TREE;
20001
20002 el_mode = TYPE_MODE (TREE_TYPE (type_out));
20003 n = TYPE_VECTOR_SUBPARTS (type_out);
20004 in_mode = TYPE_MODE (TREE_TYPE (type_in));
20005 in_n = TYPE_VECTOR_SUBPARTS (type_in);
20006 if (el_mode != in_mode
20007 || n != in_n)
20008 return NULL_TREE;
20009
20010 switch (fn)
20011 {
20012 case BUILT_IN_SIN:
20013 case BUILT_IN_COS:
20014 case BUILT_IN_EXP:
20015 case BUILT_IN_LOG:
20016 case BUILT_IN_LOG2:
20017 case BUILT_IN_LOG10:
20018 name[4] = 'd';
20019 name[5] = '2';
20020 if (el_mode != DFmode
20021 || n != 2)
20022 return NULL_TREE;
20023 break;
20024
20025 case BUILT_IN_SINF:
20026 case BUILT_IN_COSF:
20027 case BUILT_IN_EXPF:
20028 case BUILT_IN_POWF:
20029 case BUILT_IN_LOGF:
20030 case BUILT_IN_LOG2F:
20031 case BUILT_IN_LOG10F:
20032 name[4] = 's';
20033 name[5] = '4';
20034 if (el_mode != SFmode
20035 || n != 4)
20036 return NULL_TREE;
20037 break;
20038
20039 default:
20040 return NULL_TREE;
20041 }
20042
20043 bname = IDENTIFIER_POINTER (DECL_NAME (implicit_built_in_decls[fn]));
20044 sprintf (name + 7, "%s", bname+10);
20045
20046 arity = 0;
20047 for (args = DECL_ARGUMENTS (implicit_built_in_decls[fn]); args;
20048 args = TREE_CHAIN (args))
20049 arity++;
20050
20051 if (arity == 1)
20052 fntype = build_function_type_list (type_out, type_in, NULL);
20053 else
20054 fntype = build_function_type_list (type_out, type_in, type_in, NULL);
20055
20056 /* Build a function declaration for the vectorized function. */
20057 new_fndecl = build_decl (FUNCTION_DECL, get_identifier (name), fntype);
20058 TREE_PUBLIC (new_fndecl) = 1;
20059 DECL_EXTERNAL (new_fndecl) = 1;
20060 DECL_IS_NOVOPS (new_fndecl) = 1;
20061 TREE_READONLY (new_fndecl) = 1;
20062
20063 return new_fndecl;
20064 }
20065
20066
20067 /* Returns a decl of a function that implements conversion of the
20068 input vector of type TYPE, or NULL_TREE if it is not available. */
20069
20070 static tree
20071 ix86_vectorize_builtin_conversion (unsigned int code, tree type)
20072 {
20073 if (TREE_CODE (type) != VECTOR_TYPE)
20074 return NULL_TREE;
20075
20076 switch (code)
20077 {
20078 case FLOAT_EXPR:
20079 switch (TYPE_MODE (type))
20080 {
20081 case V4SImode:
20082 return ix86_builtins[IX86_BUILTIN_CVTDQ2PS];
20083 default:
20084 return NULL_TREE;
20085 }
20086
20087 case FIX_TRUNC_EXPR:
20088 switch (TYPE_MODE (type))
20089 {
20090 case V4SFmode:
20091 return ix86_builtins[IX86_BUILTIN_CVTTPS2DQ];
20092 default:
20093 return NULL_TREE;
20094 }
20095 default:
20096 return NULL_TREE;
20097
20098 }
20099 }
20100
20101 /* Returns a code for a target-specific builtin that implements
20102 reciprocal of the function, or NULL_TREE if not available. */
20103
20104 static tree
20105 ix86_builtin_reciprocal (unsigned int fn, bool md_fn,
20106 bool sqrt ATTRIBUTE_UNUSED)
20107 {
20108 if (! (TARGET_SSE_MATH && TARGET_RECIP && !optimize_size
20109 && flag_finite_math_only && !flag_trapping_math
20110 && flag_unsafe_math_optimizations))
20111 return NULL_TREE;
20112
20113 if (md_fn)
20114 /* Machine dependent builtins. */
20115 switch (fn)
20116 {
20117 /* Vectorized version of sqrt to rsqrt conversion. */
20118 case IX86_BUILTIN_SQRTPS:
20119 return ix86_builtins[IX86_BUILTIN_RSQRTPS];
20120
20121 default:
20122 return NULL_TREE;
20123 }
20124 else
20125 /* Normal builtins. */
20126 switch (fn)
20127 {
20128 /* Sqrt to rsqrt conversion. */
20129 case BUILT_IN_SQRTF:
20130 return ix86_builtins[IX86_BUILTIN_RSQRTF];
20131
20132 default:
20133 return NULL_TREE;
20134 }
20135 }
20136
20137 /* Store OPERAND to the memory after reload is completed. This means
20138 that we can't easily use assign_stack_local. */
20139 rtx
20140 ix86_force_to_memory (enum machine_mode mode, rtx operand)
20141 {
20142 rtx result;
20143
20144 gcc_assert (reload_completed);
20145 if (TARGET_RED_ZONE)
20146 {
20147 result = gen_rtx_MEM (mode,
20148 gen_rtx_PLUS (Pmode,
20149 stack_pointer_rtx,
20150 GEN_INT (-RED_ZONE_SIZE)));
20151 emit_move_insn (result, operand);
20152 }
20153 else if (!TARGET_RED_ZONE && TARGET_64BIT)
20154 {
20155 switch (mode)
20156 {
20157 case HImode:
20158 case SImode:
20159 operand = gen_lowpart (DImode, operand);
20160 /* FALLTHRU */
20161 case DImode:
20162 emit_insn (
20163 gen_rtx_SET (VOIDmode,
20164 gen_rtx_MEM (DImode,
20165 gen_rtx_PRE_DEC (DImode,
20166 stack_pointer_rtx)),
20167 operand));
20168 break;
20169 default:
20170 gcc_unreachable ();
20171 }
20172 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20173 }
20174 else
20175 {
20176 switch (mode)
20177 {
20178 case DImode:
20179 {
20180 rtx operands[2];
20181 split_di (&operand, 1, operands, operands + 1);
20182 emit_insn (
20183 gen_rtx_SET (VOIDmode,
20184 gen_rtx_MEM (SImode,
20185 gen_rtx_PRE_DEC (Pmode,
20186 stack_pointer_rtx)),
20187 operands[1]));
20188 emit_insn (
20189 gen_rtx_SET (VOIDmode,
20190 gen_rtx_MEM (SImode,
20191 gen_rtx_PRE_DEC (Pmode,
20192 stack_pointer_rtx)),
20193 operands[0]));
20194 }
20195 break;
20196 case HImode:
20197 /* Store HImodes as SImodes. */
20198 operand = gen_lowpart (SImode, operand);
20199 /* FALLTHRU */
20200 case SImode:
20201 emit_insn (
20202 gen_rtx_SET (VOIDmode,
20203 gen_rtx_MEM (GET_MODE (operand),
20204 gen_rtx_PRE_DEC (SImode,
20205 stack_pointer_rtx)),
20206 operand));
20207 break;
20208 default:
20209 gcc_unreachable ();
20210 }
20211 result = gen_rtx_MEM (mode, stack_pointer_rtx);
20212 }
20213 return result;
20214 }
20215
20216 /* Free operand from the memory. */
20217 void
20218 ix86_free_from_memory (enum machine_mode mode)
20219 {
20220 if (!TARGET_RED_ZONE)
20221 {
20222 int size;
20223
20224 if (mode == DImode || TARGET_64BIT)
20225 size = 8;
20226 else
20227 size = 4;
20228 /* Use LEA to deallocate stack space. In peephole2 it will be converted
20229 to pop or add instruction if registers are available. */
20230 emit_insn (gen_rtx_SET (VOIDmode, stack_pointer_rtx,
20231 gen_rtx_PLUS (Pmode, stack_pointer_rtx,
20232 GEN_INT (size))));
20233 }
20234 }
20235
20236 /* Put float CONST_DOUBLE in the constant pool instead of fp regs.
20237 QImode must go into class Q_REGS.
20238 Narrow ALL_REGS to GENERAL_REGS. This supports allowing movsf and
20239 movdf to do mem-to-mem moves through integer regs. */
20240 enum reg_class
20241 ix86_preferred_reload_class (rtx x, enum reg_class regclass)
20242 {
20243 enum machine_mode mode = GET_MODE (x);
20244
20245 /* We're only allowed to return a subclass of CLASS. Many of the
20246 following checks fail for NO_REGS, so eliminate that early. */
20247 if (regclass == NO_REGS)
20248 return NO_REGS;
20249
20250 /* All classes can load zeros. */
20251 if (x == CONST0_RTX (mode))
20252 return regclass;
20253
20254 /* Force constants into memory if we are loading a (nonzero) constant into
20255 an MMX or SSE register. This is because there are no MMX/SSE instructions
20256 to load from a constant. */
20257 if (CONSTANT_P (x)
20258 && (MAYBE_MMX_CLASS_P (regclass) || MAYBE_SSE_CLASS_P (regclass)))
20259 return NO_REGS;
20260
20261 /* Prefer SSE regs only, if we can use them for math. */
20262 if (TARGET_SSE_MATH && !TARGET_MIX_SSE_I387 && SSE_FLOAT_MODE_P (mode))
20263 return SSE_CLASS_P (regclass) ? regclass : NO_REGS;
20264
20265 /* Floating-point constants need more complex checks. */
20266 if (GET_CODE (x) == CONST_DOUBLE && GET_MODE (x) != VOIDmode)
20267 {
20268 /* General regs can load everything. */
20269 if (reg_class_subset_p (regclass, GENERAL_REGS))
20270 return regclass;
20271
20272 /* Floats can load 0 and 1 plus some others. Note that we eliminated
20273 zero above. We only want to wind up preferring 80387 registers if
20274 we plan on doing computation with them. */
20275 if (TARGET_80387
20276 && standard_80387_constant_p (x))
20277 {
20278 /* Limit class to non-sse. */
20279 if (regclass == FLOAT_SSE_REGS)
20280 return FLOAT_REGS;
20281 if (regclass == FP_TOP_SSE_REGS)
20282 return FP_TOP_REG;
20283 if (regclass == FP_SECOND_SSE_REGS)
20284 return FP_SECOND_REG;
20285 if (regclass == FLOAT_INT_REGS || regclass == FLOAT_REGS)
20286 return regclass;
20287 }
20288
20289 return NO_REGS;
20290 }
20291
20292 /* Generally when we see PLUS here, it's the function invariant
20293 (plus soft-fp const_int). Which can only be computed into general
20294 regs. */
20295 if (GET_CODE (x) == PLUS)
20296 return reg_class_subset_p (regclass, GENERAL_REGS) ? regclass : NO_REGS;
20297
20298 /* QImode constants are easy to load, but non-constant QImode data
20299 must go into Q_REGS. */
20300 if (GET_MODE (x) == QImode && !CONSTANT_P (x))
20301 {
20302 if (reg_class_subset_p (regclass, Q_REGS))
20303 return regclass;
20304 if (reg_class_subset_p (Q_REGS, regclass))
20305 return Q_REGS;
20306 return NO_REGS;
20307 }
20308
20309 return regclass;
20310 }
20311
20312 /* Discourage putting floating-point values in SSE registers unless
20313 SSE math is being used, and likewise for the 387 registers. */
20314 enum reg_class
20315 ix86_preferred_output_reload_class (rtx x, enum reg_class regclass)
20316 {
20317 enum machine_mode mode = GET_MODE (x);
20318
20319 /* Restrict the output reload class to the register bank that we are doing
20320 math on. If we would like not to return a subset of CLASS, reject this
20321 alternative: if reload cannot do this, it will still use its choice. */
20322 mode = GET_MODE (x);
20323 if (TARGET_SSE_MATH && SSE_FLOAT_MODE_P (mode))
20324 return MAYBE_SSE_CLASS_P (regclass) ? SSE_REGS : NO_REGS;
20325
20326 if (X87_FLOAT_MODE_P (mode))
20327 {
20328 if (regclass == FP_TOP_SSE_REGS)
20329 return FP_TOP_REG;
20330 else if (regclass == FP_SECOND_SSE_REGS)
20331 return FP_SECOND_REG;
20332 else
20333 return FLOAT_CLASS_P (regclass) ? regclass : NO_REGS;
20334 }
20335
20336 return regclass;
20337 }
20338
20339 /* If we are copying between general and FP registers, we need a memory
20340 location. The same is true for SSE and MMX registers.
20341
20342 To optimize register_move_cost performance, allow inline variant.
20343
20344 The macro can't work reliably when one of the CLASSES is class containing
20345 registers from multiple units (SSE, MMX, integer). We avoid this by never
20346 combining those units in single alternative in the machine description.
20347 Ensure that this constraint holds to avoid unexpected surprises.
20348
20349 When STRICT is false, we are being called from REGISTER_MOVE_COST, so do not
20350 enforce these sanity checks. */
20351
20352 static inline int
20353 inline_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20354 enum machine_mode mode, int strict)
20355 {
20356 if (MAYBE_FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class1)
20357 || MAYBE_FLOAT_CLASS_P (class2) != FLOAT_CLASS_P (class2)
20358 || MAYBE_SSE_CLASS_P (class1) != SSE_CLASS_P (class1)
20359 || MAYBE_SSE_CLASS_P (class2) != SSE_CLASS_P (class2)
20360 || MAYBE_MMX_CLASS_P (class1) != MMX_CLASS_P (class1)
20361 || MAYBE_MMX_CLASS_P (class2) != MMX_CLASS_P (class2))
20362 {
20363 gcc_assert (!strict);
20364 return true;
20365 }
20366
20367 if (FLOAT_CLASS_P (class1) != FLOAT_CLASS_P (class2))
20368 return true;
20369
20370 /* ??? This is a lie. We do have moves between mmx/general, and for
20371 mmx/sse2. But by saying we need secondary memory we discourage the
20372 register allocator from using the mmx registers unless needed. */
20373 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2))
20374 return true;
20375
20376 if (SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20377 {
20378 /* SSE1 doesn't have any direct moves from other classes. */
20379 if (!TARGET_SSE2)
20380 return true;
20381
20382 /* If the target says that inter-unit moves are more expensive
20383 than moving through memory, then don't generate them. */
20384 if (!TARGET_INTER_UNIT_MOVES)
20385 return true;
20386
20387 /* Between SSE and general, we have moves no larger than word size. */
20388 if (GET_MODE_SIZE (mode) > UNITS_PER_WORD)
20389 return true;
20390 }
20391
20392 return false;
20393 }
20394
20395 int
20396 ix86_secondary_memory_needed (enum reg_class class1, enum reg_class class2,
20397 enum machine_mode mode, int strict)
20398 {
20399 return inline_secondary_memory_needed (class1, class2, mode, strict);
20400 }
20401
20402 /* Return true if the registers in CLASS cannot represent the change from
20403 modes FROM to TO. */
20404
20405 bool
20406 ix86_cannot_change_mode_class (enum machine_mode from, enum machine_mode to,
20407 enum reg_class regclass)
20408 {
20409 if (from == to)
20410 return false;
20411
20412 /* x87 registers can't do subreg at all, as all values are reformatted
20413 to extended precision. */
20414 if (MAYBE_FLOAT_CLASS_P (regclass))
20415 return true;
20416
20417 if (MAYBE_SSE_CLASS_P (regclass) || MAYBE_MMX_CLASS_P (regclass))
20418 {
20419 /* Vector registers do not support QI or HImode loads. If we don't
20420 disallow a change to these modes, reload will assume it's ok to
20421 drop the subreg from (subreg:SI (reg:HI 100) 0). This affects
20422 the vec_dupv4hi pattern. */
20423 if (GET_MODE_SIZE (from) < 4)
20424 return true;
20425
20426 /* Vector registers do not support subreg with nonzero offsets, which
20427 are otherwise valid for integer registers. Since we can't see
20428 whether we have a nonzero offset from here, prohibit all
20429 nonparadoxical subregs changing size. */
20430 if (GET_MODE_SIZE (to) < GET_MODE_SIZE (from))
20431 return true;
20432 }
20433
20434 return false;
20435 }
20436
20437 /* Return the cost of moving data of mode M between a
20438 register and memory. A value of 2 is the default; this cost is
20439 relative to those in `REGISTER_MOVE_COST'.
20440
20441 This function is used extensively by register_move_cost that is used to
20442 build tables at startup. Make it inline in this case.
20443 When IN is 2, return maximum of in and out move cost.
20444
20445 If moving between registers and memory is more expensive than
20446 between two registers, you should define this macro to express the
20447 relative cost.
20448
20449 Model also increased moving costs of QImode registers in non
20450 Q_REGS classes.
20451 */
20452 static inline int
20453 inline_memory_move_cost (enum machine_mode mode, enum reg_class regclass,
20454 int in)
20455 {
20456 int cost;
20457 if (FLOAT_CLASS_P (regclass))
20458 {
20459 int index;
20460 switch (mode)
20461 {
20462 case SFmode:
20463 index = 0;
20464 break;
20465 case DFmode:
20466 index = 1;
20467 break;
20468 case XFmode:
20469 index = 2;
20470 break;
20471 default:
20472 return 100;
20473 }
20474 if (in == 2)
20475 return MAX (ix86_cost->fp_load [index], ix86_cost->fp_store [index]);
20476 return in ? ix86_cost->fp_load [index] : ix86_cost->fp_store [index];
20477 }
20478 if (SSE_CLASS_P (regclass))
20479 {
20480 int index;
20481 switch (GET_MODE_SIZE (mode))
20482 {
20483 case 4:
20484 index = 0;
20485 break;
20486 case 8:
20487 index = 1;
20488 break;
20489 case 16:
20490 index = 2;
20491 break;
20492 default:
20493 return 100;
20494 }
20495 if (in == 2)
20496 return MAX (ix86_cost->sse_load [index], ix86_cost->sse_store [index]);
20497 return in ? ix86_cost->sse_load [index] : ix86_cost->sse_store [index];
20498 }
20499 if (MMX_CLASS_P (regclass))
20500 {
20501 int index;
20502 switch (GET_MODE_SIZE (mode))
20503 {
20504 case 4:
20505 index = 0;
20506 break;
20507 case 8:
20508 index = 1;
20509 break;
20510 default:
20511 return 100;
20512 }
20513 if (in)
20514 return MAX (ix86_cost->mmx_load [index], ix86_cost->mmx_store [index]);
20515 return in ? ix86_cost->mmx_load [index] : ix86_cost->mmx_store [index];
20516 }
20517 switch (GET_MODE_SIZE (mode))
20518 {
20519 case 1:
20520 if (Q_CLASS_P (regclass) || TARGET_64BIT)
20521 {
20522 if (!in)
20523 return ix86_cost->int_store[0];
20524 if (TARGET_PARTIAL_REG_DEPENDENCY && !optimize_size)
20525 cost = ix86_cost->movzbl_load;
20526 else
20527 cost = ix86_cost->int_load[0];
20528 if (in == 2)
20529 return MAX (cost, ix86_cost->int_store[0]);
20530 return cost;
20531 }
20532 else
20533 {
20534 if (in == 2)
20535 return MAX (ix86_cost->movzbl_load, ix86_cost->int_store[0] + 4);
20536 if (in)
20537 return ix86_cost->movzbl_load;
20538 else
20539 return ix86_cost->int_store[0] + 4;
20540 }
20541 break;
20542 case 2:
20543 if (in == 2)
20544 return MAX (ix86_cost->int_load[1], ix86_cost->int_store[1]);
20545 return in ? ix86_cost->int_load[1] : ix86_cost->int_store[1];
20546 default:
20547 /* Compute number of 32bit moves needed. TFmode is moved as XFmode. */
20548 if (mode == TFmode)
20549 mode = XFmode;
20550 if (in == 2)
20551 cost = MAX (ix86_cost->int_load[2] , ix86_cost->int_store[2]);
20552 else if (in)
20553 cost = ix86_cost->int_load[2];
20554 else
20555 cost = ix86_cost->int_store[2];
20556 return (cost * (((int) GET_MODE_SIZE (mode)
20557 + UNITS_PER_WORD - 1) / UNITS_PER_WORD));
20558 }
20559 }
20560
20561 int
20562 ix86_memory_move_cost (enum machine_mode mode, enum reg_class regclass, int in)
20563 {
20564 return inline_memory_move_cost (mode, regclass, in);
20565 }
20566
20567
20568 /* Return the cost of moving data from a register in class CLASS1 to
20569 one in class CLASS2.
20570
20571 It is not required that the cost always equal 2 when FROM is the same as TO;
20572 on some machines it is expensive to move between registers if they are not
20573 general registers. */
20574
20575 int
20576 ix86_register_move_cost (enum machine_mode mode, enum reg_class class1,
20577 enum reg_class class2)
20578 {
20579 /* In case we require secondary memory, compute cost of the store followed
20580 by load. In order to avoid bad register allocation choices, we need
20581 for this to be *at least* as high as the symmetric MEMORY_MOVE_COST. */
20582
20583 if (inline_secondary_memory_needed (class1, class2, mode, 0))
20584 {
20585 int cost = 1;
20586
20587 cost += inline_memory_move_cost (mode, class1, 2);
20588 cost += inline_memory_move_cost (mode, class2, 2);
20589
20590 /* In case of copying from general_purpose_register we may emit multiple
20591 stores followed by single load causing memory size mismatch stall.
20592 Count this as arbitrarily high cost of 20. */
20593 if (CLASS_MAX_NREGS (class1, mode) > CLASS_MAX_NREGS (class2, mode))
20594 cost += 20;
20595
20596 /* In the case of FP/MMX moves, the registers actually overlap, and we
20597 have to switch modes in order to treat them differently. */
20598 if ((MMX_CLASS_P (class1) && MAYBE_FLOAT_CLASS_P (class2))
20599 || (MMX_CLASS_P (class2) && MAYBE_FLOAT_CLASS_P (class1)))
20600 cost += 20;
20601
20602 return cost;
20603 }
20604
20605 /* Moves between SSE/MMX and integer unit are expensive. */
20606 if (MMX_CLASS_P (class1) != MMX_CLASS_P (class2)
20607 || SSE_CLASS_P (class1) != SSE_CLASS_P (class2))
20608
20609 /* ??? By keeping returned value relatively high, we limit the number
20610 of moves between integer and MMX/SSE registers for all targets.
20611 Additionally, high value prevents problem with x86_modes_tieable_p(),
20612 where integer modes in MMX/SSE registers are not tieable
20613 because of missing QImode and HImode moves to, from or between
20614 MMX/SSE registers. */
20615 return MAX (ix86_cost->mmxsse_to_integer, 8);
20616
20617 if (MAYBE_FLOAT_CLASS_P (class1))
20618 return ix86_cost->fp_move;
20619 if (MAYBE_SSE_CLASS_P (class1))
20620 return ix86_cost->sse_move;
20621 if (MAYBE_MMX_CLASS_P (class1))
20622 return ix86_cost->mmx_move;
20623 return 2;
20624 }
20625
20626 /* Return 1 if hard register REGNO can hold a value of machine-mode MODE. */
20627
20628 bool
20629 ix86_hard_regno_mode_ok (int regno, enum machine_mode mode)
20630 {
20631 /* Flags and only flags can only hold CCmode values. */
20632 if (CC_REGNO_P (regno))
20633 return GET_MODE_CLASS (mode) == MODE_CC;
20634 if (GET_MODE_CLASS (mode) == MODE_CC
20635 || GET_MODE_CLASS (mode) == MODE_RANDOM
20636 || GET_MODE_CLASS (mode) == MODE_PARTIAL_INT)
20637 return 0;
20638 if (FP_REGNO_P (regno))
20639 return VALID_FP_MODE_P (mode);
20640 if (SSE_REGNO_P (regno))
20641 {
20642 /* We implement the move patterns for all vector modes into and
20643 out of SSE registers, even when no operation instructions
20644 are available. */
20645 return (VALID_SSE_REG_MODE (mode)
20646 || VALID_SSE2_REG_MODE (mode)
20647 || VALID_MMX_REG_MODE (mode)
20648 || VALID_MMX_REG_MODE_3DNOW (mode));
20649 }
20650 if (MMX_REGNO_P (regno))
20651 {
20652 /* We implement the move patterns for 3DNOW modes even in MMX mode,
20653 so if the register is available at all, then we can move data of
20654 the given mode into or out of it. */
20655 return (VALID_MMX_REG_MODE (mode)
20656 || VALID_MMX_REG_MODE_3DNOW (mode));
20657 }
20658
20659 if (mode == QImode)
20660 {
20661 /* Take care for QImode values - they can be in non-QI regs,
20662 but then they do cause partial register stalls. */
20663 if (regno < 4 || TARGET_64BIT)
20664 return 1;
20665 if (!TARGET_PARTIAL_REG_STALL)
20666 return 1;
20667 return reload_in_progress || reload_completed;
20668 }
20669 /* We handle both integer and floats in the general purpose registers. */
20670 else if (VALID_INT_MODE_P (mode))
20671 return 1;
20672 else if (VALID_FP_MODE_P (mode))
20673 return 1;
20674 else if (VALID_DFP_MODE_P (mode))
20675 return 1;
20676 /* Lots of MMX code casts 8 byte vector modes to DImode. If we then go
20677 on to use that value in smaller contexts, this can easily force a
20678 pseudo to be allocated to GENERAL_REGS. Since this is no worse than
20679 supporting DImode, allow it. */
20680 else if (VALID_MMX_REG_MODE_3DNOW (mode) || VALID_MMX_REG_MODE (mode))
20681 return 1;
20682
20683 return 0;
20684 }
20685
20686 /* A subroutine of ix86_modes_tieable_p. Return true if MODE is a
20687 tieable integer mode. */
20688
20689 static bool
20690 ix86_tieable_integer_mode_p (enum machine_mode mode)
20691 {
20692 switch (mode)
20693 {
20694 case HImode:
20695 case SImode:
20696 return true;
20697
20698 case QImode:
20699 return TARGET_64BIT || !TARGET_PARTIAL_REG_STALL;
20700
20701 case DImode:
20702 return TARGET_64BIT;
20703
20704 default:
20705 return false;
20706 }
20707 }
20708
20709 /* Return true if MODE1 is accessible in a register that can hold MODE2
20710 without copying. That is, all register classes that can hold MODE2
20711 can also hold MODE1. */
20712
20713 bool
20714 ix86_modes_tieable_p (enum machine_mode mode1, enum machine_mode mode2)
20715 {
20716 if (mode1 == mode2)
20717 return true;
20718
20719 if (ix86_tieable_integer_mode_p (mode1)
20720 && ix86_tieable_integer_mode_p (mode2))
20721 return true;
20722
20723 /* MODE2 being XFmode implies fp stack or general regs, which means we
20724 can tie any smaller floating point modes to it. Note that we do not
20725 tie this with TFmode. */
20726 if (mode2 == XFmode)
20727 return mode1 == SFmode || mode1 == DFmode;
20728
20729 /* MODE2 being DFmode implies fp stack, general or sse regs, which means
20730 that we can tie it with SFmode. */
20731 if (mode2 == DFmode)
20732 return mode1 == SFmode;
20733
20734 /* If MODE2 is only appropriate for an SSE register, then tie with
20735 any other mode acceptable to SSE registers. */
20736 if (GET_MODE_SIZE (mode2) == 16
20737 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode2))
20738 return (GET_MODE_SIZE (mode1) == 16
20739 && ix86_hard_regno_mode_ok (FIRST_SSE_REG, mode1));
20740
20741 /* If MODE2 is appropriate for an MMX register, then tie
20742 with any other mode acceptable to MMX registers. */
20743 if (GET_MODE_SIZE (mode2) == 8
20744 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode2))
20745 return (GET_MODE_SIZE (mode1) == 8
20746 && ix86_hard_regno_mode_ok (FIRST_MMX_REG, mode1));
20747
20748 return false;
20749 }
20750
20751 /* Compute a (partial) cost for rtx X. Return true if the complete
20752 cost has been computed, and false if subexpressions should be
20753 scanned. In either case, *TOTAL contains the cost result. */
20754
20755 static bool
20756 ix86_rtx_costs (rtx x, int code, int outer_code_i, int *total)
20757 {
20758 enum rtx_code outer_code = (enum rtx_code) outer_code_i;
20759 enum machine_mode mode = GET_MODE (x);
20760
20761 switch (code)
20762 {
20763 case CONST_INT:
20764 case CONST:
20765 case LABEL_REF:
20766 case SYMBOL_REF:
20767 if (TARGET_64BIT && !x86_64_immediate_operand (x, VOIDmode))
20768 *total = 3;
20769 else if (TARGET_64BIT && !x86_64_zext_immediate_operand (x, VOIDmode))
20770 *total = 2;
20771 else if (flag_pic && SYMBOLIC_CONST (x)
20772 && (!TARGET_64BIT
20773 || (!GET_CODE (x) != LABEL_REF
20774 && (GET_CODE (x) != SYMBOL_REF
20775 || !SYMBOL_REF_LOCAL_P (x)))))
20776 *total = 1;
20777 else
20778 *total = 0;
20779 return true;
20780
20781 case CONST_DOUBLE:
20782 if (mode == VOIDmode)
20783 *total = 0;
20784 else
20785 switch (standard_80387_constant_p (x))
20786 {
20787 case 1: /* 0.0 */
20788 *total = 1;
20789 break;
20790 default: /* Other constants */
20791 *total = 2;
20792 break;
20793 case 0:
20794 case -1:
20795 /* Start with (MEM (SYMBOL_REF)), since that's where
20796 it'll probably end up. Add a penalty for size. */
20797 *total = (COSTS_N_INSNS (1)
20798 + (flag_pic != 0 && !TARGET_64BIT)
20799 + (mode == SFmode ? 0 : mode == DFmode ? 1 : 2));
20800 break;
20801 }
20802 return true;
20803
20804 case ZERO_EXTEND:
20805 /* The zero extensions is often completely free on x86_64, so make
20806 it as cheap as possible. */
20807 if (TARGET_64BIT && mode == DImode
20808 && GET_MODE (XEXP (x, 0)) == SImode)
20809 *total = 1;
20810 else if (TARGET_ZERO_EXTEND_WITH_AND)
20811 *total = ix86_cost->add;
20812 else
20813 *total = ix86_cost->movzx;
20814 return false;
20815
20816 case SIGN_EXTEND:
20817 *total = ix86_cost->movsx;
20818 return false;
20819
20820 case ASHIFT:
20821 if (CONST_INT_P (XEXP (x, 1))
20822 && (GET_MODE (XEXP (x, 0)) != DImode || TARGET_64BIT))
20823 {
20824 HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20825 if (value == 1)
20826 {
20827 *total = ix86_cost->add;
20828 return false;
20829 }
20830 if ((value == 2 || value == 3)
20831 && ix86_cost->lea <= ix86_cost->shift_const)
20832 {
20833 *total = ix86_cost->lea;
20834 return false;
20835 }
20836 }
20837 /* FALLTHRU */
20838
20839 case ROTATE:
20840 case ASHIFTRT:
20841 case LSHIFTRT:
20842 case ROTATERT:
20843 if (!TARGET_64BIT && GET_MODE (XEXP (x, 0)) == DImode)
20844 {
20845 if (CONST_INT_P (XEXP (x, 1)))
20846 {
20847 if (INTVAL (XEXP (x, 1)) > 32)
20848 *total = ix86_cost->shift_const + COSTS_N_INSNS (2);
20849 else
20850 *total = ix86_cost->shift_const * 2;
20851 }
20852 else
20853 {
20854 if (GET_CODE (XEXP (x, 1)) == AND)
20855 *total = ix86_cost->shift_var * 2;
20856 else
20857 *total = ix86_cost->shift_var * 6 + COSTS_N_INSNS (2);
20858 }
20859 }
20860 else
20861 {
20862 if (CONST_INT_P (XEXP (x, 1)))
20863 *total = ix86_cost->shift_const;
20864 else
20865 *total = ix86_cost->shift_var;
20866 }
20867 return false;
20868
20869 case MULT:
20870 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20871 {
20872 /* ??? SSE scalar cost should be used here. */
20873 *total = ix86_cost->fmul;
20874 return false;
20875 }
20876 else if (X87_FLOAT_MODE_P (mode))
20877 {
20878 *total = ix86_cost->fmul;
20879 return false;
20880 }
20881 else if (FLOAT_MODE_P (mode))
20882 {
20883 /* ??? SSE vector cost should be used here. */
20884 *total = ix86_cost->fmul;
20885 return false;
20886 }
20887 else
20888 {
20889 rtx op0 = XEXP (x, 0);
20890 rtx op1 = XEXP (x, 1);
20891 int nbits;
20892 if (CONST_INT_P (XEXP (x, 1)))
20893 {
20894 unsigned HOST_WIDE_INT value = INTVAL (XEXP (x, 1));
20895 for (nbits = 0; value != 0; value &= value - 1)
20896 nbits++;
20897 }
20898 else
20899 /* This is arbitrary. */
20900 nbits = 7;
20901
20902 /* Compute costs correctly for widening multiplication. */
20903 if ((GET_CODE (op0) == SIGN_EXTEND || GET_CODE (op1) == ZERO_EXTEND)
20904 && GET_MODE_SIZE (GET_MODE (XEXP (op0, 0))) * 2
20905 == GET_MODE_SIZE (mode))
20906 {
20907 int is_mulwiden = 0;
20908 enum machine_mode inner_mode = GET_MODE (op0);
20909
20910 if (GET_CODE (op0) == GET_CODE (op1))
20911 is_mulwiden = 1, op1 = XEXP (op1, 0);
20912 else if (CONST_INT_P (op1))
20913 {
20914 if (GET_CODE (op0) == SIGN_EXTEND)
20915 is_mulwiden = trunc_int_for_mode (INTVAL (op1), inner_mode)
20916 == INTVAL (op1);
20917 else
20918 is_mulwiden = !(INTVAL (op1) & ~GET_MODE_MASK (inner_mode));
20919 }
20920
20921 if (is_mulwiden)
20922 op0 = XEXP (op0, 0), mode = GET_MODE (op0);
20923 }
20924
20925 *total = (ix86_cost->mult_init[MODE_INDEX (mode)]
20926 + nbits * ix86_cost->mult_bit
20927 + rtx_cost (op0, outer_code) + rtx_cost (op1, outer_code));
20928
20929 return true;
20930 }
20931
20932 case DIV:
20933 case UDIV:
20934 case MOD:
20935 case UMOD:
20936 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20937 /* ??? SSE cost should be used here. */
20938 *total = ix86_cost->fdiv;
20939 else if (X87_FLOAT_MODE_P (mode))
20940 *total = ix86_cost->fdiv;
20941 else if (FLOAT_MODE_P (mode))
20942 /* ??? SSE vector cost should be used here. */
20943 *total = ix86_cost->fdiv;
20944 else
20945 *total = ix86_cost->divide[MODE_INDEX (mode)];
20946 return false;
20947
20948 case PLUS:
20949 if (GET_MODE_CLASS (mode) == MODE_INT
20950 && GET_MODE_BITSIZE (mode) <= GET_MODE_BITSIZE (Pmode))
20951 {
20952 if (GET_CODE (XEXP (x, 0)) == PLUS
20953 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
20954 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1))
20955 && CONSTANT_P (XEXP (x, 1)))
20956 {
20957 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (XEXP (x, 0), 0), 1));
20958 if (val == 2 || val == 4 || val == 8)
20959 {
20960 *total = ix86_cost->lea;
20961 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20962 *total += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0),
20963 outer_code);
20964 *total += rtx_cost (XEXP (x, 1), outer_code);
20965 return true;
20966 }
20967 }
20968 else if (GET_CODE (XEXP (x, 0)) == MULT
20969 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
20970 {
20971 HOST_WIDE_INT val = INTVAL (XEXP (XEXP (x, 0), 1));
20972 if (val == 2 || val == 4 || val == 8)
20973 {
20974 *total = ix86_cost->lea;
20975 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20976 *total += rtx_cost (XEXP (x, 1), outer_code);
20977 return true;
20978 }
20979 }
20980 else if (GET_CODE (XEXP (x, 0)) == PLUS)
20981 {
20982 *total = ix86_cost->lea;
20983 *total += rtx_cost (XEXP (XEXP (x, 0), 0), outer_code);
20984 *total += rtx_cost (XEXP (XEXP (x, 0), 1), outer_code);
20985 *total += rtx_cost (XEXP (x, 1), outer_code);
20986 return true;
20987 }
20988 }
20989 /* FALLTHRU */
20990
20991 case MINUS:
20992 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
20993 {
20994 /* ??? SSE cost should be used here. */
20995 *total = ix86_cost->fadd;
20996 return false;
20997 }
20998 else if (X87_FLOAT_MODE_P (mode))
20999 {
21000 *total = ix86_cost->fadd;
21001 return false;
21002 }
21003 else if (FLOAT_MODE_P (mode))
21004 {
21005 /* ??? SSE vector cost should be used here. */
21006 *total = ix86_cost->fadd;
21007 return false;
21008 }
21009 /* FALLTHRU */
21010
21011 case AND:
21012 case IOR:
21013 case XOR:
21014 if (!TARGET_64BIT && mode == DImode)
21015 {
21016 *total = (ix86_cost->add * 2
21017 + (rtx_cost (XEXP (x, 0), outer_code)
21018 << (GET_MODE (XEXP (x, 0)) != DImode))
21019 + (rtx_cost (XEXP (x, 1), outer_code)
21020 << (GET_MODE (XEXP (x, 1)) != DImode)));
21021 return true;
21022 }
21023 /* FALLTHRU */
21024
21025 case NEG:
21026 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21027 {
21028 /* ??? SSE cost should be used here. */
21029 *total = ix86_cost->fchs;
21030 return false;
21031 }
21032 else if (X87_FLOAT_MODE_P (mode))
21033 {
21034 *total = ix86_cost->fchs;
21035 return false;
21036 }
21037 else if (FLOAT_MODE_P (mode))
21038 {
21039 /* ??? SSE vector cost should be used here. */
21040 *total = ix86_cost->fchs;
21041 return false;
21042 }
21043 /* FALLTHRU */
21044
21045 case NOT:
21046 if (!TARGET_64BIT && mode == DImode)
21047 *total = ix86_cost->add * 2;
21048 else
21049 *total = ix86_cost->add;
21050 return false;
21051
21052 case COMPARE:
21053 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT
21054 && XEXP (XEXP (x, 0), 1) == const1_rtx
21055 && CONST_INT_P (XEXP (XEXP (x, 0), 2))
21056 && XEXP (x, 1) == const0_rtx)
21057 {
21058 /* This kind of construct is implemented using test[bwl].
21059 Treat it as if we had an AND. */
21060 *total = (ix86_cost->add
21061 + rtx_cost (XEXP (XEXP (x, 0), 0), outer_code)
21062 + rtx_cost (const1_rtx, outer_code));
21063 return true;
21064 }
21065 return false;
21066
21067 case FLOAT_EXTEND:
21068 if (!(SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH))
21069 *total = 0;
21070 return false;
21071
21072 case ABS:
21073 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21074 /* ??? SSE cost should be used here. */
21075 *total = ix86_cost->fabs;
21076 else if (X87_FLOAT_MODE_P (mode))
21077 *total = ix86_cost->fabs;
21078 else if (FLOAT_MODE_P (mode))
21079 /* ??? SSE vector cost should be used here. */
21080 *total = ix86_cost->fabs;
21081 return false;
21082
21083 case SQRT:
21084 if (SSE_FLOAT_MODE_P (mode) && TARGET_SSE_MATH)
21085 /* ??? SSE cost should be used here. */
21086 *total = ix86_cost->fsqrt;
21087 else if (X87_FLOAT_MODE_P (mode))
21088 *total = ix86_cost->fsqrt;
21089 else if (FLOAT_MODE_P (mode))
21090 /* ??? SSE vector cost should be used here. */
21091 *total = ix86_cost->fsqrt;
21092 return false;
21093
21094 case UNSPEC:
21095 if (XINT (x, 1) == UNSPEC_TP)
21096 *total = 0;
21097 return false;
21098
21099 default:
21100 return false;
21101 }
21102 }
21103
21104 #if TARGET_MACHO
21105
21106 static int current_machopic_label_num;
21107
21108 /* Given a symbol name and its associated stub, write out the
21109 definition of the stub. */
21110
21111 void
21112 machopic_output_stub (FILE *file, const char *symb, const char *stub)
21113 {
21114 unsigned int length;
21115 char *binder_name, *symbol_name, lazy_ptr_name[32];
21116 int label = ++current_machopic_label_num;
21117
21118 /* For 64-bit we shouldn't get here. */
21119 gcc_assert (!TARGET_64BIT);
21120
21121 /* Lose our funky encoding stuff so it doesn't contaminate the stub. */
21122 symb = (*targetm.strip_name_encoding) (symb);
21123
21124 length = strlen (stub);
21125 binder_name = alloca (length + 32);
21126 GEN_BINDER_NAME_FOR_STUB (binder_name, stub, length);
21127
21128 length = strlen (symb);
21129 symbol_name = alloca (length + 32);
21130 GEN_SYMBOL_NAME_FOR_SYMBOL (symbol_name, symb, length);
21131
21132 sprintf (lazy_ptr_name, "L%d$lz", label);
21133
21134 if (MACHOPIC_PURE)
21135 switch_to_section (darwin_sections[machopic_picsymbol_stub_section]);
21136 else
21137 switch_to_section (darwin_sections[machopic_symbol_stub_section]);
21138
21139 fprintf (file, "%s:\n", stub);
21140 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21141
21142 if (MACHOPIC_PURE)
21143 {
21144 fprintf (file, "\tcall\tLPC$%d\nLPC$%d:\tpopl\t%%eax\n", label, label);
21145 fprintf (file, "\tmovl\t%s-LPC$%d(%%eax),%%edx\n", lazy_ptr_name, label);
21146 fprintf (file, "\tjmp\t*%%edx\n");
21147 }
21148 else
21149 fprintf (file, "\tjmp\t*%s\n", lazy_ptr_name);
21150
21151 fprintf (file, "%s:\n", binder_name);
21152
21153 if (MACHOPIC_PURE)
21154 {
21155 fprintf (file, "\tlea\t%s-LPC$%d(%%eax),%%eax\n", lazy_ptr_name, label);
21156 fprintf (file, "\tpushl\t%%eax\n");
21157 }
21158 else
21159 fprintf (file, "\tpushl\t$%s\n", lazy_ptr_name);
21160
21161 fprintf (file, "\tjmp\tdyld_stub_binding_helper\n");
21162
21163 switch_to_section (darwin_sections[machopic_lazy_symbol_ptr_section]);
21164 fprintf (file, "%s:\n", lazy_ptr_name);
21165 fprintf (file, "\t.indirect_symbol %s\n", symbol_name);
21166 fprintf (file, "\t.long %s\n", binder_name);
21167 }
21168
21169 void
21170 darwin_x86_file_end (void)
21171 {
21172 darwin_file_end ();
21173 ix86_file_end ();
21174 }
21175 #endif /* TARGET_MACHO */
21176
21177 /* Order the registers for register allocator. */
21178
21179 void
21180 x86_order_regs_for_local_alloc (void)
21181 {
21182 int pos = 0;
21183 int i;
21184
21185 /* First allocate the local general purpose registers. */
21186 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21187 if (GENERAL_REGNO_P (i) && call_used_regs[i])
21188 reg_alloc_order [pos++] = i;
21189
21190 /* Global general purpose registers. */
21191 for (i = 0; i < FIRST_PSEUDO_REGISTER; i++)
21192 if (GENERAL_REGNO_P (i) && !call_used_regs[i])
21193 reg_alloc_order [pos++] = i;
21194
21195 /* x87 registers come first in case we are doing FP math
21196 using them. */
21197 if (!TARGET_SSE_MATH)
21198 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21199 reg_alloc_order [pos++] = i;
21200
21201 /* SSE registers. */
21202 for (i = FIRST_SSE_REG; i <= LAST_SSE_REG; i++)
21203 reg_alloc_order [pos++] = i;
21204 for (i = FIRST_REX_SSE_REG; i <= LAST_REX_SSE_REG; i++)
21205 reg_alloc_order [pos++] = i;
21206
21207 /* x87 registers. */
21208 if (TARGET_SSE_MATH)
21209 for (i = FIRST_STACK_REG; i <= LAST_STACK_REG; i++)
21210 reg_alloc_order [pos++] = i;
21211
21212 for (i = FIRST_MMX_REG; i <= LAST_MMX_REG; i++)
21213 reg_alloc_order [pos++] = i;
21214
21215 /* Initialize the rest of array as we do not allocate some registers
21216 at all. */
21217 while (pos < FIRST_PSEUDO_REGISTER)
21218 reg_alloc_order [pos++] = 0;
21219 }
21220
21221 /* Handle a "ms_struct" or "gcc_struct" attribute; arguments as in
21222 struct attribute_spec.handler. */
21223 static tree
21224 ix86_handle_struct_attribute (tree *node, tree name,
21225 tree args ATTRIBUTE_UNUSED,
21226 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
21227 {
21228 tree *type = NULL;
21229 if (DECL_P (*node))
21230 {
21231 if (TREE_CODE (*node) == TYPE_DECL)
21232 type = &TREE_TYPE (*node);
21233 }
21234 else
21235 type = node;
21236
21237 if (!(type && (TREE_CODE (*type) == RECORD_TYPE
21238 || TREE_CODE (*type) == UNION_TYPE)))
21239 {
21240 warning (OPT_Wattributes, "%qs attribute ignored",
21241 IDENTIFIER_POINTER (name));
21242 *no_add_attrs = true;
21243 }
21244
21245 else if ((is_attribute_p ("ms_struct", name)
21246 && lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (*type)))
21247 || ((is_attribute_p ("gcc_struct", name)
21248 && lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (*type)))))
21249 {
21250 warning (OPT_Wattributes, "%qs incompatible attribute ignored",
21251 IDENTIFIER_POINTER (name));
21252 *no_add_attrs = true;
21253 }
21254
21255 return NULL_TREE;
21256 }
21257
21258 static bool
21259 ix86_ms_bitfield_layout_p (const_tree record_type)
21260 {
21261 return (TARGET_MS_BITFIELD_LAYOUT &&
21262 !lookup_attribute ("gcc_struct", TYPE_ATTRIBUTES (record_type)))
21263 || lookup_attribute ("ms_struct", TYPE_ATTRIBUTES (record_type));
21264 }
21265
21266 /* Returns an expression indicating where the this parameter is
21267 located on entry to the FUNCTION. */
21268
21269 static rtx
21270 x86_this_parameter (tree function)
21271 {
21272 tree type = TREE_TYPE (function);
21273 bool aggr = aggregate_value_p (TREE_TYPE (type), type) != 0;
21274
21275 if (TARGET_64BIT)
21276 {
21277 const int *parm_regs;
21278
21279 if (TARGET_64BIT_MS_ABI)
21280 parm_regs = x86_64_ms_abi_int_parameter_registers;
21281 else
21282 parm_regs = x86_64_int_parameter_registers;
21283 return gen_rtx_REG (DImode, parm_regs[aggr]);
21284 }
21285
21286 if (ix86_function_regparm (type, function) > 0
21287 && !type_has_variadic_args_p (type))
21288 {
21289 int regno = 0;
21290 if (lookup_attribute ("fastcall", TYPE_ATTRIBUTES (type)))
21291 regno = 2;
21292 return gen_rtx_REG (SImode, regno);
21293 }
21294
21295 return gen_rtx_MEM (SImode, plus_constant (stack_pointer_rtx, aggr ? 8 : 4));
21296 }
21297
21298 /* Determine whether x86_output_mi_thunk can succeed. */
21299
21300 static bool
21301 x86_can_output_mi_thunk (const_tree thunk ATTRIBUTE_UNUSED,
21302 HOST_WIDE_INT delta ATTRIBUTE_UNUSED,
21303 HOST_WIDE_INT vcall_offset, const_tree function)
21304 {
21305 /* 64-bit can handle anything. */
21306 if (TARGET_64BIT)
21307 return true;
21308
21309 /* For 32-bit, everything's fine if we have one free register. */
21310 if (ix86_function_regparm (TREE_TYPE (function), function) < 3)
21311 return true;
21312
21313 /* Need a free register for vcall_offset. */
21314 if (vcall_offset)
21315 return false;
21316
21317 /* Need a free register for GOT references. */
21318 if (flag_pic && !(*targetm.binds_local_p) (function))
21319 return false;
21320
21321 /* Otherwise ok. */
21322 return true;
21323 }
21324
21325 /* Output the assembler code for a thunk function. THUNK_DECL is the
21326 declaration for the thunk function itself, FUNCTION is the decl for
21327 the target function. DELTA is an immediate constant offset to be
21328 added to THIS. If VCALL_OFFSET is nonzero, the word at
21329 *(*this + vcall_offset) should be added to THIS. */
21330
21331 static void
21332 x86_output_mi_thunk (FILE *file ATTRIBUTE_UNUSED,
21333 tree thunk ATTRIBUTE_UNUSED, HOST_WIDE_INT delta,
21334 HOST_WIDE_INT vcall_offset, tree function)
21335 {
21336 rtx xops[3];
21337 rtx this_param = x86_this_parameter (function);
21338 rtx this_reg, tmp;
21339
21340 /* If VCALL_OFFSET, we'll need THIS in a register. Might as well
21341 pull it in now and let DELTA benefit. */
21342 if (REG_P (this_param))
21343 this_reg = this_param;
21344 else if (vcall_offset)
21345 {
21346 /* Put the this parameter into %eax. */
21347 xops[0] = this_param;
21348 xops[1] = this_reg = gen_rtx_REG (Pmode, 0);
21349 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21350 }
21351 else
21352 this_reg = NULL_RTX;
21353
21354 /* Adjust the this parameter by a fixed constant. */
21355 if (delta)
21356 {
21357 xops[0] = GEN_INT (delta);
21358 xops[1] = this_reg ? this_reg : this_param;
21359 if (TARGET_64BIT)
21360 {
21361 if (!x86_64_general_operand (xops[0], DImode))
21362 {
21363 tmp = gen_rtx_REG (DImode, R10_REG);
21364 xops[1] = tmp;
21365 output_asm_insn ("mov{q}\t{%1, %0|%0, %1}", xops);
21366 xops[0] = tmp;
21367 xops[1] = this_param;
21368 }
21369 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21370 }
21371 else
21372 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21373 }
21374
21375 /* Adjust the this parameter by a value stored in the vtable. */
21376 if (vcall_offset)
21377 {
21378 if (TARGET_64BIT)
21379 tmp = gen_rtx_REG (DImode, R10_REG);
21380 else
21381 {
21382 int tmp_regno = 2 /* ECX */;
21383 if (lookup_attribute ("fastcall",
21384 TYPE_ATTRIBUTES (TREE_TYPE (function))))
21385 tmp_regno = 0 /* EAX */;
21386 tmp = gen_rtx_REG (SImode, tmp_regno);
21387 }
21388
21389 xops[0] = gen_rtx_MEM (Pmode, this_reg);
21390 xops[1] = tmp;
21391 if (TARGET_64BIT)
21392 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21393 else
21394 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21395
21396 /* Adjust the this parameter. */
21397 xops[0] = gen_rtx_MEM (Pmode, plus_constant (tmp, vcall_offset));
21398 if (TARGET_64BIT && !memory_operand (xops[0], Pmode))
21399 {
21400 rtx tmp2 = gen_rtx_REG (DImode, R11_REG);
21401 xops[0] = GEN_INT (vcall_offset);
21402 xops[1] = tmp2;
21403 output_asm_insn ("mov{q}\t{%0, %1|%1, %0}", xops);
21404 xops[0] = gen_rtx_MEM (Pmode, gen_rtx_PLUS (Pmode, tmp, tmp2));
21405 }
21406 xops[1] = this_reg;
21407 if (TARGET_64BIT)
21408 output_asm_insn ("add{q}\t{%0, %1|%1, %0}", xops);
21409 else
21410 output_asm_insn ("add{l}\t{%0, %1|%1, %0}", xops);
21411 }
21412
21413 /* If necessary, drop THIS back to its stack slot. */
21414 if (this_reg && this_reg != this_param)
21415 {
21416 xops[0] = this_reg;
21417 xops[1] = this_param;
21418 output_asm_insn ("mov{l}\t{%0, %1|%1, %0}", xops);
21419 }
21420
21421 xops[0] = XEXP (DECL_RTL (function), 0);
21422 if (TARGET_64BIT)
21423 {
21424 if (!flag_pic || (*targetm.binds_local_p) (function))
21425 output_asm_insn ("jmp\t%P0", xops);
21426 /* All thunks should be in the same object as their target,
21427 and thus binds_local_p should be true. */
21428 else if (TARGET_64BIT_MS_ABI)
21429 gcc_unreachable ();
21430 else
21431 {
21432 tmp = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, xops[0]), UNSPEC_GOTPCREL);
21433 tmp = gen_rtx_CONST (Pmode, tmp);
21434 tmp = gen_rtx_MEM (QImode, tmp);
21435 xops[0] = tmp;
21436 output_asm_insn ("jmp\t%A0", xops);
21437 }
21438 }
21439 else
21440 {
21441 if (!flag_pic || (*targetm.binds_local_p) (function))
21442 output_asm_insn ("jmp\t%P0", xops);
21443 else
21444 #if TARGET_MACHO
21445 if (TARGET_MACHO)
21446 {
21447 rtx sym_ref = XEXP (DECL_RTL (function), 0);
21448 tmp = (gen_rtx_SYMBOL_REF
21449 (Pmode,
21450 machopic_indirection_name (sym_ref, /*stub_p=*/true)));
21451 tmp = gen_rtx_MEM (QImode, tmp);
21452 xops[0] = tmp;
21453 output_asm_insn ("jmp\t%0", xops);
21454 }
21455 else
21456 #endif /* TARGET_MACHO */
21457 {
21458 tmp = gen_rtx_REG (SImode, 2 /* ECX */);
21459 output_set_got (tmp, NULL_RTX);
21460
21461 xops[1] = tmp;
21462 output_asm_insn ("mov{l}\t{%0@GOT(%1), %1|%1, %0@GOT[%1]}", xops);
21463 output_asm_insn ("jmp\t{*}%1", xops);
21464 }
21465 }
21466 }
21467
21468 static void
21469 x86_file_start (void)
21470 {
21471 default_file_start ();
21472 #if TARGET_MACHO
21473 darwin_file_start ();
21474 #endif
21475 if (X86_FILE_START_VERSION_DIRECTIVE)
21476 fputs ("\t.version\t\"01.01\"\n", asm_out_file);
21477 if (X86_FILE_START_FLTUSED)
21478 fputs ("\t.global\t__fltused\n", asm_out_file);
21479 if (ix86_asm_dialect == ASM_INTEL)
21480 fputs ("\t.intel_syntax\n", asm_out_file);
21481 }
21482
21483 int
21484 x86_field_alignment (tree field, int computed)
21485 {
21486 enum machine_mode mode;
21487 tree type = TREE_TYPE (field);
21488
21489 if (TARGET_64BIT || TARGET_ALIGN_DOUBLE)
21490 return computed;
21491 mode = TYPE_MODE (TREE_CODE (type) == ARRAY_TYPE
21492 ? get_inner_array_type (type) : type);
21493 if (mode == DFmode || mode == DCmode
21494 || GET_MODE_CLASS (mode) == MODE_INT
21495 || GET_MODE_CLASS (mode) == MODE_COMPLEX_INT)
21496 return MIN (32, computed);
21497 return computed;
21498 }
21499
21500 /* Output assembler code to FILE to increment profiler label # LABELNO
21501 for profiling a function entry. */
21502 void
21503 x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
21504 {
21505 if (TARGET_64BIT)
21506 {
21507 #ifndef NO_PROFILE_COUNTERS
21508 fprintf (file, "\tleaq\t%sP%d@(%%rip),%%r11\n", LPREFIX, labelno);
21509 #endif
21510
21511 if (!TARGET_64BIT_MS_ABI && flag_pic)
21512 fprintf (file, "\tcall\t*%s@GOTPCREL(%%rip)\n", MCOUNT_NAME);
21513 else
21514 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21515 }
21516 else if (flag_pic)
21517 {
21518 #ifndef NO_PROFILE_COUNTERS
21519 fprintf (file, "\tleal\t%sP%d@GOTOFF(%%ebx),%%%s\n",
21520 LPREFIX, labelno, PROFILE_COUNT_REGISTER);
21521 #endif
21522 fprintf (file, "\tcall\t*%s@GOT(%%ebx)\n", MCOUNT_NAME);
21523 }
21524 else
21525 {
21526 #ifndef NO_PROFILE_COUNTERS
21527 fprintf (file, "\tmovl\t$%sP%d,%%%s\n", LPREFIX, labelno,
21528 PROFILE_COUNT_REGISTER);
21529 #endif
21530 fprintf (file, "\tcall\t%s\n", MCOUNT_NAME);
21531 }
21532 }
21533
21534 /* We don't have exact information about the insn sizes, but we may assume
21535 quite safely that we are informed about all 1 byte insns and memory
21536 address sizes. This is enough to eliminate unnecessary padding in
21537 99% of cases. */
21538
21539 static int
21540 min_insn_size (rtx insn)
21541 {
21542 int l = 0;
21543
21544 if (!INSN_P (insn) || !active_insn_p (insn))
21545 return 0;
21546
21547 /* Discard alignments we've emit and jump instructions. */
21548 if (GET_CODE (PATTERN (insn)) == UNSPEC_VOLATILE
21549 && XINT (PATTERN (insn), 1) == UNSPECV_ALIGN)
21550 return 0;
21551 if (JUMP_P (insn)
21552 && (GET_CODE (PATTERN (insn)) == ADDR_VEC
21553 || GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC))
21554 return 0;
21555
21556 /* Important case - calls are always 5 bytes.
21557 It is common to have many calls in the row. */
21558 if (CALL_P (insn)
21559 && symbolic_reference_mentioned_p (PATTERN (insn))
21560 && !SIBLING_CALL_P (insn))
21561 return 5;
21562 if (get_attr_length (insn) <= 1)
21563 return 1;
21564
21565 /* For normal instructions we may rely on the sizes of addresses
21566 and the presence of symbol to require 4 bytes of encoding.
21567 This is not the case for jumps where references are PC relative. */
21568 if (!JUMP_P (insn))
21569 {
21570 l = get_attr_length_address (insn);
21571 if (l < 4 && symbolic_reference_mentioned_p (PATTERN (insn)))
21572 l = 4;
21573 }
21574 if (l)
21575 return 1+l;
21576 else
21577 return 2;
21578 }
21579
21580 /* AMD K8 core mispredicts jumps when there are more than 3 jumps in 16 byte
21581 window. */
21582
21583 static void
21584 ix86_avoid_jump_misspredicts (void)
21585 {
21586 rtx insn, start = get_insns ();
21587 int nbytes = 0, njumps = 0;
21588 int isjump = 0;
21589
21590 /* Look for all minimal intervals of instructions containing 4 jumps.
21591 The intervals are bounded by START and INSN. NBYTES is the total
21592 size of instructions in the interval including INSN and not including
21593 START. When the NBYTES is smaller than 16 bytes, it is possible
21594 that the end of START and INSN ends up in the same 16byte page.
21595
21596 The smallest offset in the page INSN can start is the case where START
21597 ends on the offset 0. Offset of INSN is then NBYTES - sizeof (INSN).
21598 We add p2align to 16byte window with maxskip 17 - NBYTES + sizeof (INSN).
21599 */
21600 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
21601 {
21602
21603 nbytes += min_insn_size (insn);
21604 if (dump_file)
21605 fprintf(dump_file, "Insn %i estimated to %i bytes\n",
21606 INSN_UID (insn), min_insn_size (insn));
21607 if ((JUMP_P (insn)
21608 && GET_CODE (PATTERN (insn)) != ADDR_VEC
21609 && GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
21610 || CALL_P (insn))
21611 njumps++;
21612 else
21613 continue;
21614
21615 while (njumps > 3)
21616 {
21617 start = NEXT_INSN (start);
21618 if ((JUMP_P (start)
21619 && GET_CODE (PATTERN (start)) != ADDR_VEC
21620 && GET_CODE (PATTERN (start)) != ADDR_DIFF_VEC)
21621 || CALL_P (start))
21622 njumps--, isjump = 1;
21623 else
21624 isjump = 0;
21625 nbytes -= min_insn_size (start);
21626 }
21627 gcc_assert (njumps >= 0);
21628 if (dump_file)
21629 fprintf (dump_file, "Interval %i to %i has %i bytes\n",
21630 INSN_UID (start), INSN_UID (insn), nbytes);
21631
21632 if (njumps == 3 && isjump && nbytes < 16)
21633 {
21634 int padsize = 15 - nbytes + min_insn_size (insn);
21635
21636 if (dump_file)
21637 fprintf (dump_file, "Padding insn %i by %i bytes!\n",
21638 INSN_UID (insn), padsize);
21639 emit_insn_before (gen_align (GEN_INT (padsize)), insn);
21640 }
21641 }
21642 }
21643
21644 /* AMD Athlon works faster
21645 when RET is not destination of conditional jump or directly preceded
21646 by other jump instruction. We avoid the penalty by inserting NOP just
21647 before the RET instructions in such cases. */
21648 static void
21649 ix86_pad_returns (void)
21650 {
21651 edge e;
21652 edge_iterator ei;
21653
21654 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR->preds)
21655 {
21656 basic_block bb = e->src;
21657 rtx ret = BB_END (bb);
21658 rtx prev;
21659 bool replace = false;
21660
21661 if (!JUMP_P (ret) || GET_CODE (PATTERN (ret)) != RETURN
21662 || !maybe_hot_bb_p (bb))
21663 continue;
21664 for (prev = PREV_INSN (ret); prev; prev = PREV_INSN (prev))
21665 if (active_insn_p (prev) || LABEL_P (prev))
21666 break;
21667 if (prev && LABEL_P (prev))
21668 {
21669 edge e;
21670 edge_iterator ei;
21671
21672 FOR_EACH_EDGE (e, ei, bb->preds)
21673 if (EDGE_FREQUENCY (e) && e->src->index >= 0
21674 && !(e->flags & EDGE_FALLTHRU))
21675 replace = true;
21676 }
21677 if (!replace)
21678 {
21679 prev = prev_active_insn (ret);
21680 if (prev
21681 && ((JUMP_P (prev) && any_condjump_p (prev))
21682 || CALL_P (prev)))
21683 replace = true;
21684 /* Empty functions get branch mispredict even when the jump destination
21685 is not visible to us. */
21686 if (!prev && cfun->function_frequency > FUNCTION_FREQUENCY_UNLIKELY_EXECUTED)
21687 replace = true;
21688 }
21689 if (replace)
21690 {
21691 emit_insn_before (gen_return_internal_long (), ret);
21692 delete_insn (ret);
21693 }
21694 }
21695 }
21696
21697 /* Implement machine specific optimizations. We implement padding of returns
21698 for K8 CPUs and pass to avoid 4 jumps in the single 16 byte window. */
21699 static void
21700 ix86_reorg (void)
21701 {
21702 if (TARGET_PAD_RETURNS && optimize && !optimize_size)
21703 ix86_pad_returns ();
21704 if (TARGET_FOUR_JUMP_LIMIT && optimize && !optimize_size)
21705 ix86_avoid_jump_misspredicts ();
21706 }
21707
21708 /* Return nonzero when QImode register that must be represented via REX prefix
21709 is used. */
21710 bool
21711 x86_extended_QIreg_mentioned_p (rtx insn)
21712 {
21713 int i;
21714 extract_insn_cached (insn);
21715 for (i = 0; i < recog_data.n_operands; i++)
21716 if (REG_P (recog_data.operand[i])
21717 && REGNO (recog_data.operand[i]) >= 4)
21718 return true;
21719 return false;
21720 }
21721
21722 /* Return nonzero when P points to register encoded via REX prefix.
21723 Called via for_each_rtx. */
21724 static int
21725 extended_reg_mentioned_1 (rtx *p, void *data ATTRIBUTE_UNUSED)
21726 {
21727 unsigned int regno;
21728 if (!REG_P (*p))
21729 return 0;
21730 regno = REGNO (*p);
21731 return REX_INT_REGNO_P (regno) || REX_SSE_REGNO_P (regno);
21732 }
21733
21734 /* Return true when INSN mentions register that must be encoded using REX
21735 prefix. */
21736 bool
21737 x86_extended_reg_mentioned_p (rtx insn)
21738 {
21739 return for_each_rtx (&PATTERN (insn), extended_reg_mentioned_1, NULL);
21740 }
21741
21742 /* Generate an unsigned DImode/SImode to FP conversion. This is the same code
21743 optabs would emit if we didn't have TFmode patterns. */
21744
21745 void
21746 x86_emit_floatuns (rtx operands[2])
21747 {
21748 rtx neglab, donelab, i0, i1, f0, in, out;
21749 enum machine_mode mode, inmode;
21750
21751 inmode = GET_MODE (operands[1]);
21752 gcc_assert (inmode == SImode || inmode == DImode);
21753
21754 out = operands[0];
21755 in = force_reg (inmode, operands[1]);
21756 mode = GET_MODE (out);
21757 neglab = gen_label_rtx ();
21758 donelab = gen_label_rtx ();
21759 f0 = gen_reg_rtx (mode);
21760
21761 emit_cmp_and_jump_insns (in, const0_rtx, LT, const0_rtx, inmode, 0, neglab);
21762
21763 expand_float (out, in, 0);
21764
21765 emit_jump_insn (gen_jump (donelab));
21766 emit_barrier ();
21767
21768 emit_label (neglab);
21769
21770 i0 = expand_simple_binop (inmode, LSHIFTRT, in, const1_rtx, NULL,
21771 1, OPTAB_DIRECT);
21772 i1 = expand_simple_binop (inmode, AND, in, const1_rtx, NULL,
21773 1, OPTAB_DIRECT);
21774 i0 = expand_simple_binop (inmode, IOR, i0, i1, i0, 1, OPTAB_DIRECT);
21775
21776 expand_float (f0, i0, 0);
21777
21778 emit_insn (gen_rtx_SET (VOIDmode, out, gen_rtx_PLUS (mode, f0, f0)));
21779
21780 emit_label (donelab);
21781 }
21782 \f
21783 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21784 with all elements equal to VAR. Return true if successful. */
21785
21786 static bool
21787 ix86_expand_vector_init_duplicate (bool mmx_ok, enum machine_mode mode,
21788 rtx target, rtx val)
21789 {
21790 enum machine_mode smode, wsmode, wvmode;
21791 rtx x;
21792
21793 switch (mode)
21794 {
21795 case V2SImode:
21796 case V2SFmode:
21797 if (!mmx_ok)
21798 return false;
21799 /* FALLTHRU */
21800
21801 case V2DFmode:
21802 case V2DImode:
21803 case V4SFmode:
21804 case V4SImode:
21805 val = force_reg (GET_MODE_INNER (mode), val);
21806 x = gen_rtx_VEC_DUPLICATE (mode, val);
21807 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21808 return true;
21809
21810 case V4HImode:
21811 if (!mmx_ok)
21812 return false;
21813 if (TARGET_SSE || TARGET_3DNOW_A)
21814 {
21815 val = gen_lowpart (SImode, val);
21816 x = gen_rtx_TRUNCATE (HImode, val);
21817 x = gen_rtx_VEC_DUPLICATE (mode, x);
21818 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21819 return true;
21820 }
21821 else
21822 {
21823 smode = HImode;
21824 wsmode = SImode;
21825 wvmode = V2SImode;
21826 goto widen;
21827 }
21828
21829 case V8QImode:
21830 if (!mmx_ok)
21831 return false;
21832 smode = QImode;
21833 wsmode = HImode;
21834 wvmode = V4HImode;
21835 goto widen;
21836 case V8HImode:
21837 if (TARGET_SSE2)
21838 {
21839 rtx tmp1, tmp2;
21840 /* Extend HImode to SImode using a paradoxical SUBREG. */
21841 tmp1 = gen_reg_rtx (SImode);
21842 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21843 /* Insert the SImode value as low element of V4SImode vector. */
21844 tmp2 = gen_reg_rtx (V4SImode);
21845 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21846 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21847 CONST0_RTX (V4SImode),
21848 const1_rtx);
21849 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21850 /* Cast the V4SImode vector back to a V8HImode vector. */
21851 tmp1 = gen_reg_rtx (V8HImode);
21852 emit_move_insn (tmp1, gen_lowpart (V8HImode, tmp2));
21853 /* Duplicate the low short through the whole low SImode word. */
21854 emit_insn (gen_sse2_punpcklwd (tmp1, tmp1, tmp1));
21855 /* Cast the V8HImode vector back to a V4SImode vector. */
21856 tmp2 = gen_reg_rtx (V4SImode);
21857 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21858 /* Replicate the low element of the V4SImode vector. */
21859 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21860 /* Cast the V2SImode back to V8HImode, and store in target. */
21861 emit_move_insn (target, gen_lowpart (V8HImode, tmp2));
21862 return true;
21863 }
21864 smode = HImode;
21865 wsmode = SImode;
21866 wvmode = V4SImode;
21867 goto widen;
21868 case V16QImode:
21869 if (TARGET_SSE2)
21870 {
21871 rtx tmp1, tmp2;
21872 /* Extend QImode to SImode using a paradoxical SUBREG. */
21873 tmp1 = gen_reg_rtx (SImode);
21874 emit_move_insn (tmp1, gen_lowpart (SImode, val));
21875 /* Insert the SImode value as low element of V4SImode vector. */
21876 tmp2 = gen_reg_rtx (V4SImode);
21877 tmp1 = gen_rtx_VEC_MERGE (V4SImode,
21878 gen_rtx_VEC_DUPLICATE (V4SImode, tmp1),
21879 CONST0_RTX (V4SImode),
21880 const1_rtx);
21881 emit_insn (gen_rtx_SET (VOIDmode, tmp2, tmp1));
21882 /* Cast the V4SImode vector back to a V16QImode vector. */
21883 tmp1 = gen_reg_rtx (V16QImode);
21884 emit_move_insn (tmp1, gen_lowpart (V16QImode, tmp2));
21885 /* Duplicate the low byte through the whole low SImode word. */
21886 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21887 emit_insn (gen_sse2_punpcklbw (tmp1, tmp1, tmp1));
21888 /* Cast the V16QImode vector back to a V4SImode vector. */
21889 tmp2 = gen_reg_rtx (V4SImode);
21890 emit_move_insn (tmp2, gen_lowpart (V4SImode, tmp1));
21891 /* Replicate the low element of the V4SImode vector. */
21892 emit_insn (gen_sse2_pshufd (tmp2, tmp2, const0_rtx));
21893 /* Cast the V2SImode back to V16QImode, and store in target. */
21894 emit_move_insn (target, gen_lowpart (V16QImode, tmp2));
21895 return true;
21896 }
21897 smode = QImode;
21898 wsmode = HImode;
21899 wvmode = V8HImode;
21900 goto widen;
21901 widen:
21902 /* Replicate the value once into the next wider mode and recurse. */
21903 val = convert_modes (wsmode, smode, val, true);
21904 x = expand_simple_binop (wsmode, ASHIFT, val,
21905 GEN_INT (GET_MODE_BITSIZE (smode)),
21906 NULL_RTX, 1, OPTAB_LIB_WIDEN);
21907 val = expand_simple_binop (wsmode, IOR, val, x, x, 1, OPTAB_LIB_WIDEN);
21908
21909 x = gen_reg_rtx (wvmode);
21910 if (!ix86_expand_vector_init_duplicate (mmx_ok, wvmode, x, val))
21911 gcc_unreachable ();
21912 emit_move_insn (target, gen_lowpart (mode, x));
21913 return true;
21914
21915 default:
21916 return false;
21917 }
21918 }
21919
21920 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
21921 whose ONE_VAR element is VAR, and other elements are zero. Return true
21922 if successful. */
21923
21924 static bool
21925 ix86_expand_vector_init_one_nonzero (bool mmx_ok, enum machine_mode mode,
21926 rtx target, rtx var, int one_var)
21927 {
21928 enum machine_mode vsimode;
21929 rtx new_target;
21930 rtx x, tmp;
21931
21932 switch (mode)
21933 {
21934 case V2SFmode:
21935 case V2SImode:
21936 if (!mmx_ok)
21937 return false;
21938 /* FALLTHRU */
21939
21940 case V2DFmode:
21941 case V2DImode:
21942 if (one_var != 0)
21943 return false;
21944 var = force_reg (GET_MODE_INNER (mode), var);
21945 x = gen_rtx_VEC_CONCAT (mode, var, CONST0_RTX (GET_MODE_INNER (mode)));
21946 emit_insn (gen_rtx_SET (VOIDmode, target, x));
21947 return true;
21948
21949 case V4SFmode:
21950 case V4SImode:
21951 if (!REG_P (target) || REGNO (target) < FIRST_PSEUDO_REGISTER)
21952 new_target = gen_reg_rtx (mode);
21953 else
21954 new_target = target;
21955 var = force_reg (GET_MODE_INNER (mode), var);
21956 x = gen_rtx_VEC_DUPLICATE (mode, var);
21957 x = gen_rtx_VEC_MERGE (mode, x, CONST0_RTX (mode), const1_rtx);
21958 emit_insn (gen_rtx_SET (VOIDmode, new_target, x));
21959 if (one_var != 0)
21960 {
21961 /* We need to shuffle the value to the correct position, so
21962 create a new pseudo to store the intermediate result. */
21963
21964 /* With SSE2, we can use the integer shuffle insns. */
21965 if (mode != V4SFmode && TARGET_SSE2)
21966 {
21967 emit_insn (gen_sse2_pshufd_1 (new_target, new_target,
21968 GEN_INT (1),
21969 GEN_INT (one_var == 1 ? 0 : 1),
21970 GEN_INT (one_var == 2 ? 0 : 1),
21971 GEN_INT (one_var == 3 ? 0 : 1)));
21972 if (target != new_target)
21973 emit_move_insn (target, new_target);
21974 return true;
21975 }
21976
21977 /* Otherwise convert the intermediate result to V4SFmode and
21978 use the SSE1 shuffle instructions. */
21979 if (mode != V4SFmode)
21980 {
21981 tmp = gen_reg_rtx (V4SFmode);
21982 emit_move_insn (tmp, gen_lowpart (V4SFmode, new_target));
21983 }
21984 else
21985 tmp = new_target;
21986
21987 emit_insn (gen_sse_shufps_1 (tmp, tmp, tmp,
21988 GEN_INT (1),
21989 GEN_INT (one_var == 1 ? 0 : 1),
21990 GEN_INT (one_var == 2 ? 0+4 : 1+4),
21991 GEN_INT (one_var == 3 ? 0+4 : 1+4)));
21992
21993 if (mode != V4SFmode)
21994 emit_move_insn (target, gen_lowpart (V4SImode, tmp));
21995 else if (tmp != target)
21996 emit_move_insn (target, tmp);
21997 }
21998 else if (target != new_target)
21999 emit_move_insn (target, new_target);
22000 return true;
22001
22002 case V8HImode:
22003 case V16QImode:
22004 vsimode = V4SImode;
22005 goto widen;
22006 case V4HImode:
22007 case V8QImode:
22008 if (!mmx_ok)
22009 return false;
22010 vsimode = V2SImode;
22011 goto widen;
22012 widen:
22013 if (one_var != 0)
22014 return false;
22015
22016 /* Zero extend the variable element to SImode and recurse. */
22017 var = convert_modes (SImode, GET_MODE_INNER (mode), var, true);
22018
22019 x = gen_reg_rtx (vsimode);
22020 if (!ix86_expand_vector_init_one_nonzero (mmx_ok, vsimode, x,
22021 var, one_var))
22022 gcc_unreachable ();
22023
22024 emit_move_insn (target, gen_lowpart (mode, x));
22025 return true;
22026
22027 default:
22028 return false;
22029 }
22030 }
22031
22032 /* A subroutine of ix86_expand_vector_init. Store into TARGET a vector
22033 consisting of the values in VALS. It is known that all elements
22034 except ONE_VAR are constants. Return true if successful. */
22035
22036 static bool
22037 ix86_expand_vector_init_one_var (bool mmx_ok, enum machine_mode mode,
22038 rtx target, rtx vals, int one_var)
22039 {
22040 rtx var = XVECEXP (vals, 0, one_var);
22041 enum machine_mode wmode;
22042 rtx const_vec, x;
22043
22044 const_vec = copy_rtx (vals);
22045 XVECEXP (const_vec, 0, one_var) = CONST0_RTX (GET_MODE_INNER (mode));
22046 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (const_vec, 0));
22047
22048 switch (mode)
22049 {
22050 case V2DFmode:
22051 case V2DImode:
22052 case V2SFmode:
22053 case V2SImode:
22054 /* For the two element vectors, it's just as easy to use
22055 the general case. */
22056 return false;
22057
22058 case V4SFmode:
22059 case V4SImode:
22060 case V8HImode:
22061 case V4HImode:
22062 break;
22063
22064 case V16QImode:
22065 wmode = V8HImode;
22066 goto widen;
22067 case V8QImode:
22068 wmode = V4HImode;
22069 goto widen;
22070 widen:
22071 /* There's no way to set one QImode entry easily. Combine
22072 the variable value with its adjacent constant value, and
22073 promote to an HImode set. */
22074 x = XVECEXP (vals, 0, one_var ^ 1);
22075 if (one_var & 1)
22076 {
22077 var = convert_modes (HImode, QImode, var, true);
22078 var = expand_simple_binop (HImode, ASHIFT, var, GEN_INT (8),
22079 NULL_RTX, 1, OPTAB_LIB_WIDEN);
22080 x = GEN_INT (INTVAL (x) & 0xff);
22081 }
22082 else
22083 {
22084 var = convert_modes (HImode, QImode, var, true);
22085 x = gen_int_mode (INTVAL (x) << 8, HImode);
22086 }
22087 if (x != const0_rtx)
22088 var = expand_simple_binop (HImode, IOR, var, x, var,
22089 1, OPTAB_LIB_WIDEN);
22090
22091 x = gen_reg_rtx (wmode);
22092 emit_move_insn (x, gen_lowpart (wmode, const_vec));
22093 ix86_expand_vector_set (mmx_ok, x, var, one_var >> 1);
22094
22095 emit_move_insn (target, gen_lowpart (mode, x));
22096 return true;
22097
22098 default:
22099 return false;
22100 }
22101
22102 emit_move_insn (target, const_vec);
22103 ix86_expand_vector_set (mmx_ok, target, var, one_var);
22104 return true;
22105 }
22106
22107 /* A subroutine of ix86_expand_vector_init. Handle the most general case:
22108 all values variable, and none identical. */
22109
22110 static void
22111 ix86_expand_vector_init_general (bool mmx_ok, enum machine_mode mode,
22112 rtx target, rtx vals)
22113 {
22114 enum machine_mode half_mode = GET_MODE_INNER (mode);
22115 rtx op0 = NULL, op1 = NULL;
22116 bool use_vec_concat = false;
22117
22118 switch (mode)
22119 {
22120 case V2SFmode:
22121 case V2SImode:
22122 if (!mmx_ok && !TARGET_SSE)
22123 break;
22124 /* FALLTHRU */
22125
22126 case V2DFmode:
22127 case V2DImode:
22128 /* For the two element vectors, we always implement VEC_CONCAT. */
22129 op0 = XVECEXP (vals, 0, 0);
22130 op1 = XVECEXP (vals, 0, 1);
22131 use_vec_concat = true;
22132 break;
22133
22134 case V4SFmode:
22135 half_mode = V2SFmode;
22136 goto half;
22137 case V4SImode:
22138 half_mode = V2SImode;
22139 goto half;
22140 half:
22141 {
22142 rtvec v;
22143
22144 /* For V4SF and V4SI, we implement a concat of two V2 vectors.
22145 Recurse to load the two halves. */
22146
22147 op0 = gen_reg_rtx (half_mode);
22148 v = gen_rtvec (2, XVECEXP (vals, 0, 0), XVECEXP (vals, 0, 1));
22149 ix86_expand_vector_init (false, op0, gen_rtx_PARALLEL (half_mode, v));
22150
22151 op1 = gen_reg_rtx (half_mode);
22152 v = gen_rtvec (2, XVECEXP (vals, 0, 2), XVECEXP (vals, 0, 3));
22153 ix86_expand_vector_init (false, op1, gen_rtx_PARALLEL (half_mode, v));
22154
22155 use_vec_concat = true;
22156 }
22157 break;
22158
22159 case V8HImode:
22160 case V16QImode:
22161 case V4HImode:
22162 case V8QImode:
22163 break;
22164
22165 default:
22166 gcc_unreachable ();
22167 }
22168
22169 if (use_vec_concat)
22170 {
22171 if (!register_operand (op0, half_mode))
22172 op0 = force_reg (half_mode, op0);
22173 if (!register_operand (op1, half_mode))
22174 op1 = force_reg (half_mode, op1);
22175
22176 emit_insn (gen_rtx_SET (VOIDmode, target,
22177 gen_rtx_VEC_CONCAT (mode, op0, op1)));
22178 }
22179 else
22180 {
22181 int i, j, n_elts, n_words, n_elt_per_word;
22182 enum machine_mode inner_mode;
22183 rtx words[4], shift;
22184
22185 inner_mode = GET_MODE_INNER (mode);
22186 n_elts = GET_MODE_NUNITS (mode);
22187 n_words = GET_MODE_SIZE (mode) / UNITS_PER_WORD;
22188 n_elt_per_word = n_elts / n_words;
22189 shift = GEN_INT (GET_MODE_BITSIZE (inner_mode));
22190
22191 for (i = 0; i < n_words; ++i)
22192 {
22193 rtx word = NULL_RTX;
22194
22195 for (j = 0; j < n_elt_per_word; ++j)
22196 {
22197 rtx elt = XVECEXP (vals, 0, (i+1)*n_elt_per_word - j - 1);
22198 elt = convert_modes (word_mode, inner_mode, elt, true);
22199
22200 if (j == 0)
22201 word = elt;
22202 else
22203 {
22204 word = expand_simple_binop (word_mode, ASHIFT, word, shift,
22205 word, 1, OPTAB_LIB_WIDEN);
22206 word = expand_simple_binop (word_mode, IOR, word, elt,
22207 word, 1, OPTAB_LIB_WIDEN);
22208 }
22209 }
22210
22211 words[i] = word;
22212 }
22213
22214 if (n_words == 1)
22215 emit_move_insn (target, gen_lowpart (mode, words[0]));
22216 else if (n_words == 2)
22217 {
22218 rtx tmp = gen_reg_rtx (mode);
22219 emit_insn (gen_rtx_CLOBBER (VOIDmode, tmp));
22220 emit_move_insn (gen_lowpart (word_mode, tmp), words[0]);
22221 emit_move_insn (gen_highpart (word_mode, tmp), words[1]);
22222 emit_move_insn (target, tmp);
22223 }
22224 else if (n_words == 4)
22225 {
22226 rtx tmp = gen_reg_rtx (V4SImode);
22227 vals = gen_rtx_PARALLEL (V4SImode, gen_rtvec_v (4, words));
22228 ix86_expand_vector_init_general (false, V4SImode, tmp, vals);
22229 emit_move_insn (target, gen_lowpart (mode, tmp));
22230 }
22231 else
22232 gcc_unreachable ();
22233 }
22234 }
22235
22236 /* Initialize vector TARGET via VALS. Suppress the use of MMX
22237 instructions unless MMX_OK is true. */
22238
22239 void
22240 ix86_expand_vector_init (bool mmx_ok, rtx target, rtx vals)
22241 {
22242 enum machine_mode mode = GET_MODE (target);
22243 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22244 int n_elts = GET_MODE_NUNITS (mode);
22245 int n_var = 0, one_var = -1;
22246 bool all_same = true, all_const_zero = true;
22247 int i;
22248 rtx x;
22249
22250 for (i = 0; i < n_elts; ++i)
22251 {
22252 x = XVECEXP (vals, 0, i);
22253 if (!CONSTANT_P (x))
22254 n_var++, one_var = i;
22255 else if (x != CONST0_RTX (inner_mode))
22256 all_const_zero = false;
22257 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
22258 all_same = false;
22259 }
22260
22261 /* Constants are best loaded from the constant pool. */
22262 if (n_var == 0)
22263 {
22264 emit_move_insn (target, gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0)));
22265 return;
22266 }
22267
22268 /* If all values are identical, broadcast the value. */
22269 if (all_same
22270 && ix86_expand_vector_init_duplicate (mmx_ok, mode, target,
22271 XVECEXP (vals, 0, 0)))
22272 return;
22273
22274 /* Values where only one field is non-constant are best loaded from
22275 the pool and overwritten via move later. */
22276 if (n_var == 1)
22277 {
22278 if (all_const_zero
22279 && ix86_expand_vector_init_one_nonzero (mmx_ok, mode, target,
22280 XVECEXP (vals, 0, one_var),
22281 one_var))
22282 return;
22283
22284 if (ix86_expand_vector_init_one_var (mmx_ok, mode, target, vals, one_var))
22285 return;
22286 }
22287
22288 ix86_expand_vector_init_general (mmx_ok, mode, target, vals);
22289 }
22290
22291 void
22292 ix86_expand_vector_set (bool mmx_ok, rtx target, rtx val, int elt)
22293 {
22294 enum machine_mode mode = GET_MODE (target);
22295 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22296 bool use_vec_merge = false;
22297 rtx tmp;
22298
22299 switch (mode)
22300 {
22301 case V2SFmode:
22302 case V2SImode:
22303 if (mmx_ok)
22304 {
22305 tmp = gen_reg_rtx (GET_MODE_INNER (mode));
22306 ix86_expand_vector_extract (true, tmp, target, 1 - elt);
22307 if (elt == 0)
22308 tmp = gen_rtx_VEC_CONCAT (mode, tmp, val);
22309 else
22310 tmp = gen_rtx_VEC_CONCAT (mode, val, tmp);
22311 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22312 return;
22313 }
22314 break;
22315
22316 case V2DImode:
22317 use_vec_merge = TARGET_SSE4_1;
22318 if (use_vec_merge)
22319 break;
22320
22321 case V2DFmode:
22322 {
22323 rtx op0, op1;
22324
22325 /* For the two element vectors, we implement a VEC_CONCAT with
22326 the extraction of the other element. */
22327
22328 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (1 - elt)));
22329 tmp = gen_rtx_VEC_SELECT (inner_mode, target, tmp);
22330
22331 if (elt == 0)
22332 op0 = val, op1 = tmp;
22333 else
22334 op0 = tmp, op1 = val;
22335
22336 tmp = gen_rtx_VEC_CONCAT (mode, op0, op1);
22337 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22338 }
22339 return;
22340
22341 case V4SFmode:
22342 use_vec_merge = TARGET_SSE4_1;
22343 if (use_vec_merge)
22344 break;
22345
22346 switch (elt)
22347 {
22348 case 0:
22349 use_vec_merge = true;
22350 break;
22351
22352 case 1:
22353 /* tmp = target = A B C D */
22354 tmp = copy_to_reg (target);
22355 /* target = A A B B */
22356 emit_insn (gen_sse_unpcklps (target, target, target));
22357 /* target = X A B B */
22358 ix86_expand_vector_set (false, target, val, 0);
22359 /* target = A X C D */
22360 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22361 GEN_INT (1), GEN_INT (0),
22362 GEN_INT (2+4), GEN_INT (3+4)));
22363 return;
22364
22365 case 2:
22366 /* tmp = target = A B C D */
22367 tmp = copy_to_reg (target);
22368 /* tmp = X B C D */
22369 ix86_expand_vector_set (false, tmp, val, 0);
22370 /* target = A B X D */
22371 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22372 GEN_INT (0), GEN_INT (1),
22373 GEN_INT (0+4), GEN_INT (3+4)));
22374 return;
22375
22376 case 3:
22377 /* tmp = target = A B C D */
22378 tmp = copy_to_reg (target);
22379 /* tmp = X B C D */
22380 ix86_expand_vector_set (false, tmp, val, 0);
22381 /* target = A B X D */
22382 emit_insn (gen_sse_shufps_1 (target, target, tmp,
22383 GEN_INT (0), GEN_INT (1),
22384 GEN_INT (2+4), GEN_INT (0+4)));
22385 return;
22386
22387 default:
22388 gcc_unreachable ();
22389 }
22390 break;
22391
22392 case V4SImode:
22393 use_vec_merge = TARGET_SSE4_1;
22394 if (use_vec_merge)
22395 break;
22396
22397 /* Element 0 handled by vec_merge below. */
22398 if (elt == 0)
22399 {
22400 use_vec_merge = true;
22401 break;
22402 }
22403
22404 if (TARGET_SSE2)
22405 {
22406 /* With SSE2, use integer shuffles to swap element 0 and ELT,
22407 store into element 0, then shuffle them back. */
22408
22409 rtx order[4];
22410
22411 order[0] = GEN_INT (elt);
22412 order[1] = const1_rtx;
22413 order[2] = const2_rtx;
22414 order[3] = GEN_INT (3);
22415 order[elt] = const0_rtx;
22416
22417 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22418 order[1], order[2], order[3]));
22419
22420 ix86_expand_vector_set (false, target, val, 0);
22421
22422 emit_insn (gen_sse2_pshufd_1 (target, target, order[0],
22423 order[1], order[2], order[3]));
22424 }
22425 else
22426 {
22427 /* For SSE1, we have to reuse the V4SF code. */
22428 ix86_expand_vector_set (false, gen_lowpart (V4SFmode, target),
22429 gen_lowpart (SFmode, val), elt);
22430 }
22431 return;
22432
22433 case V8HImode:
22434 use_vec_merge = TARGET_SSE2;
22435 break;
22436 case V4HImode:
22437 use_vec_merge = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22438 break;
22439
22440 case V16QImode:
22441 use_vec_merge = TARGET_SSE4_1;
22442 break;
22443
22444 case V8QImode:
22445 default:
22446 break;
22447 }
22448
22449 if (use_vec_merge)
22450 {
22451 tmp = gen_rtx_VEC_DUPLICATE (mode, val);
22452 tmp = gen_rtx_VEC_MERGE (mode, tmp, target, GEN_INT (1 << elt));
22453 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22454 }
22455 else
22456 {
22457 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22458
22459 emit_move_insn (mem, target);
22460
22461 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22462 emit_move_insn (tmp, val);
22463
22464 emit_move_insn (target, mem);
22465 }
22466 }
22467
22468 void
22469 ix86_expand_vector_extract (bool mmx_ok, rtx target, rtx vec, int elt)
22470 {
22471 enum machine_mode mode = GET_MODE (vec);
22472 enum machine_mode inner_mode = GET_MODE_INNER (mode);
22473 bool use_vec_extr = false;
22474 rtx tmp;
22475
22476 switch (mode)
22477 {
22478 case V2SImode:
22479 case V2SFmode:
22480 if (!mmx_ok)
22481 break;
22482 /* FALLTHRU */
22483
22484 case V2DFmode:
22485 case V2DImode:
22486 use_vec_extr = true;
22487 break;
22488
22489 case V4SFmode:
22490 use_vec_extr = TARGET_SSE4_1;
22491 if (use_vec_extr)
22492 break;
22493
22494 switch (elt)
22495 {
22496 case 0:
22497 tmp = vec;
22498 break;
22499
22500 case 1:
22501 case 3:
22502 tmp = gen_reg_rtx (mode);
22503 emit_insn (gen_sse_shufps_1 (tmp, vec, vec,
22504 GEN_INT (elt), GEN_INT (elt),
22505 GEN_INT (elt+4), GEN_INT (elt+4)));
22506 break;
22507
22508 case 2:
22509 tmp = gen_reg_rtx (mode);
22510 emit_insn (gen_sse_unpckhps (tmp, vec, vec));
22511 break;
22512
22513 default:
22514 gcc_unreachable ();
22515 }
22516 vec = tmp;
22517 use_vec_extr = true;
22518 elt = 0;
22519 break;
22520
22521 case V4SImode:
22522 use_vec_extr = TARGET_SSE4_1;
22523 if (use_vec_extr)
22524 break;
22525
22526 if (TARGET_SSE2)
22527 {
22528 switch (elt)
22529 {
22530 case 0:
22531 tmp = vec;
22532 break;
22533
22534 case 1:
22535 case 3:
22536 tmp = gen_reg_rtx (mode);
22537 emit_insn (gen_sse2_pshufd_1 (tmp, vec,
22538 GEN_INT (elt), GEN_INT (elt),
22539 GEN_INT (elt), GEN_INT (elt)));
22540 break;
22541
22542 case 2:
22543 tmp = gen_reg_rtx (mode);
22544 emit_insn (gen_sse2_punpckhdq (tmp, vec, vec));
22545 break;
22546
22547 default:
22548 gcc_unreachable ();
22549 }
22550 vec = tmp;
22551 use_vec_extr = true;
22552 elt = 0;
22553 }
22554 else
22555 {
22556 /* For SSE1, we have to reuse the V4SF code. */
22557 ix86_expand_vector_extract (false, gen_lowpart (SFmode, target),
22558 gen_lowpart (V4SFmode, vec), elt);
22559 return;
22560 }
22561 break;
22562
22563 case V8HImode:
22564 use_vec_extr = TARGET_SSE2;
22565 break;
22566 case V4HImode:
22567 use_vec_extr = mmx_ok && (TARGET_SSE || TARGET_3DNOW_A);
22568 break;
22569
22570 case V16QImode:
22571 use_vec_extr = TARGET_SSE4_1;
22572 break;
22573
22574 case V8QImode:
22575 /* ??? Could extract the appropriate HImode element and shift. */
22576 default:
22577 break;
22578 }
22579
22580 if (use_vec_extr)
22581 {
22582 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, GEN_INT (elt)));
22583 tmp = gen_rtx_VEC_SELECT (inner_mode, vec, tmp);
22584
22585 /* Let the rtl optimizers know about the zero extension performed. */
22586 if (inner_mode == QImode || inner_mode == HImode)
22587 {
22588 tmp = gen_rtx_ZERO_EXTEND (SImode, tmp);
22589 target = gen_lowpart (SImode, target);
22590 }
22591
22592 emit_insn (gen_rtx_SET (VOIDmode, target, tmp));
22593 }
22594 else
22595 {
22596 rtx mem = assign_stack_temp (mode, GET_MODE_SIZE (mode), false);
22597
22598 emit_move_insn (mem, vec);
22599
22600 tmp = adjust_address (mem, inner_mode, elt*GET_MODE_SIZE (inner_mode));
22601 emit_move_insn (target, tmp);
22602 }
22603 }
22604
22605 /* Expand a vector reduction on V4SFmode for SSE1. FN is the binary
22606 pattern to reduce; DEST is the destination; IN is the input vector. */
22607
22608 void
22609 ix86_expand_reduc_v4sf (rtx (*fn) (rtx, rtx, rtx), rtx dest, rtx in)
22610 {
22611 rtx tmp1, tmp2, tmp3;
22612
22613 tmp1 = gen_reg_rtx (V4SFmode);
22614 tmp2 = gen_reg_rtx (V4SFmode);
22615 tmp3 = gen_reg_rtx (V4SFmode);
22616
22617 emit_insn (gen_sse_movhlps (tmp1, in, in));
22618 emit_insn (fn (tmp2, tmp1, in));
22619
22620 emit_insn (gen_sse_shufps_1 (tmp3, tmp2, tmp2,
22621 GEN_INT (1), GEN_INT (1),
22622 GEN_INT (1+4), GEN_INT (1+4)));
22623 emit_insn (fn (dest, tmp2, tmp3));
22624 }
22625 \f
22626 /* Target hook for scalar_mode_supported_p. */
22627 static bool
22628 ix86_scalar_mode_supported_p (enum machine_mode mode)
22629 {
22630 if (DECIMAL_FLOAT_MODE_P (mode))
22631 return true;
22632 else if (mode == TFmode)
22633 return TARGET_64BIT;
22634 else
22635 return default_scalar_mode_supported_p (mode);
22636 }
22637
22638 /* Implements target hook vector_mode_supported_p. */
22639 static bool
22640 ix86_vector_mode_supported_p (enum machine_mode mode)
22641 {
22642 if (TARGET_SSE && VALID_SSE_REG_MODE (mode))
22643 return true;
22644 if (TARGET_SSE2 && VALID_SSE2_REG_MODE (mode))
22645 return true;
22646 if (TARGET_MMX && VALID_MMX_REG_MODE (mode))
22647 return true;
22648 if (TARGET_3DNOW && VALID_MMX_REG_MODE_3DNOW (mode))
22649 return true;
22650 return false;
22651 }
22652
22653 /* Target hook for c_mode_for_suffix. */
22654 static enum machine_mode
22655 ix86_c_mode_for_suffix (char suffix)
22656 {
22657 if (TARGET_64BIT && suffix == 'q')
22658 return TFmode;
22659 if (TARGET_MMX && suffix == 'w')
22660 return XFmode;
22661
22662 return VOIDmode;
22663 }
22664
22665 /* Worker function for TARGET_MD_ASM_CLOBBERS.
22666
22667 We do this in the new i386 backend to maintain source compatibility
22668 with the old cc0-based compiler. */
22669
22670 static tree
22671 ix86_md_asm_clobbers (tree outputs ATTRIBUTE_UNUSED,
22672 tree inputs ATTRIBUTE_UNUSED,
22673 tree clobbers)
22674 {
22675 clobbers = tree_cons (NULL_TREE, build_string (5, "flags"),
22676 clobbers);
22677 clobbers = tree_cons (NULL_TREE, build_string (4, "fpsr"),
22678 clobbers);
22679 return clobbers;
22680 }
22681
22682 /* Implements target vector targetm.asm.encode_section_info. This
22683 is not used by netware. */
22684
22685 static void ATTRIBUTE_UNUSED
22686 ix86_encode_section_info (tree decl, rtx rtl, int first)
22687 {
22688 default_encode_section_info (decl, rtl, first);
22689
22690 if (TREE_CODE (decl) == VAR_DECL
22691 && (TREE_STATIC (decl) || DECL_EXTERNAL (decl))
22692 && ix86_in_large_data_p (decl))
22693 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FAR_ADDR;
22694 }
22695
22696 /* Worker function for REVERSE_CONDITION. */
22697
22698 enum rtx_code
22699 ix86_reverse_condition (enum rtx_code code, enum machine_mode mode)
22700 {
22701 return (mode != CCFPmode && mode != CCFPUmode
22702 ? reverse_condition (code)
22703 : reverse_condition_maybe_unordered (code));
22704 }
22705
22706 /* Output code to perform an x87 FP register move, from OPERANDS[1]
22707 to OPERANDS[0]. */
22708
22709 const char *
22710 output_387_reg_move (rtx insn, rtx *operands)
22711 {
22712 if (REG_P (operands[0]))
22713 {
22714 if (REG_P (operands[1])
22715 && find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22716 {
22717 if (REGNO (operands[0]) == FIRST_STACK_REG)
22718 return output_387_ffreep (operands, 0);
22719 return "fstp\t%y0";
22720 }
22721 if (STACK_TOP_P (operands[0]))
22722 return "fld%z1\t%y1";
22723 return "fst\t%y0";
22724 }
22725 else if (MEM_P (operands[0]))
22726 {
22727 gcc_assert (REG_P (operands[1]));
22728 if (find_regno_note (insn, REG_DEAD, REGNO (operands[1])))
22729 return "fstp%z0\t%y0";
22730 else
22731 {
22732 /* There is no non-popping store to memory for XFmode.
22733 So if we need one, follow the store with a load. */
22734 if (GET_MODE (operands[0]) == XFmode)
22735 return "fstp%z0\t%y0\n\tfld%z0\t%y0";
22736 else
22737 return "fst%z0\t%y0";
22738 }
22739 }
22740 else
22741 gcc_unreachable();
22742 }
22743
22744 /* Output code to perform a conditional jump to LABEL, if C2 flag in
22745 FP status register is set. */
22746
22747 void
22748 ix86_emit_fp_unordered_jump (rtx label)
22749 {
22750 rtx reg = gen_reg_rtx (HImode);
22751 rtx temp;
22752
22753 emit_insn (gen_x86_fnstsw_1 (reg));
22754
22755 if (TARGET_SAHF && (TARGET_USE_SAHF || optimize_size))
22756 {
22757 emit_insn (gen_x86_sahf_1 (reg));
22758
22759 temp = gen_rtx_REG (CCmode, FLAGS_REG);
22760 temp = gen_rtx_UNORDERED (VOIDmode, temp, const0_rtx);
22761 }
22762 else
22763 {
22764 emit_insn (gen_testqi_ext_ccno_0 (reg, GEN_INT (0x04)));
22765
22766 temp = gen_rtx_REG (CCNOmode, FLAGS_REG);
22767 temp = gen_rtx_NE (VOIDmode, temp, const0_rtx);
22768 }
22769
22770 temp = gen_rtx_IF_THEN_ELSE (VOIDmode, temp,
22771 gen_rtx_LABEL_REF (VOIDmode, label),
22772 pc_rtx);
22773 temp = gen_rtx_SET (VOIDmode, pc_rtx, temp);
22774
22775 emit_jump_insn (temp);
22776 predict_jump (REG_BR_PROB_BASE * 10 / 100);
22777 }
22778
22779 /* Output code to perform a log1p XFmode calculation. */
22780
22781 void ix86_emit_i387_log1p (rtx op0, rtx op1)
22782 {
22783 rtx label1 = gen_label_rtx ();
22784 rtx label2 = gen_label_rtx ();
22785
22786 rtx tmp = gen_reg_rtx (XFmode);
22787 rtx tmp2 = gen_reg_rtx (XFmode);
22788
22789 emit_insn (gen_absxf2 (tmp, op1));
22790 emit_insn (gen_cmpxf (tmp,
22791 CONST_DOUBLE_FROM_REAL_VALUE (
22792 REAL_VALUE_ATOF ("0.29289321881345247561810596348408353", XFmode),
22793 XFmode)));
22794 emit_jump_insn (gen_bge (label1));
22795
22796 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22797 emit_insn (gen_fyl2xp1xf3_i387 (op0, op1, tmp2));
22798 emit_jump (label2);
22799
22800 emit_label (label1);
22801 emit_move_insn (tmp, CONST1_RTX (XFmode));
22802 emit_insn (gen_addxf3 (tmp, op1, tmp));
22803 emit_move_insn (tmp2, standard_80387_constant_rtx (4)); /* fldln2 */
22804 emit_insn (gen_fyl2xxf3_i387 (op0, tmp, tmp2));
22805
22806 emit_label (label2);
22807 }
22808
22809 /* Output code to perform a Newton-Rhapson approximation of a single precision
22810 floating point divide [http://en.wikipedia.org/wiki/N-th_root_algorithm]. */
22811
22812 void ix86_emit_swdivsf (rtx res, rtx a, rtx b, enum machine_mode mode)
22813 {
22814 rtx x0, x1, e0, e1, two;
22815
22816 x0 = gen_reg_rtx (mode);
22817 e0 = gen_reg_rtx (mode);
22818 e1 = gen_reg_rtx (mode);
22819 x1 = gen_reg_rtx (mode);
22820
22821 two = CONST_DOUBLE_FROM_REAL_VALUE (dconst2, SFmode);
22822
22823 if (VECTOR_MODE_P (mode))
22824 two = ix86_build_const_vector (SFmode, true, two);
22825
22826 two = force_reg (mode, two);
22827
22828 /* a / b = a * rcp(b) * (2.0 - b * rcp(b)) */
22829
22830 /* x0 = 1./b estimate */
22831 emit_insn (gen_rtx_SET (VOIDmode, x0,
22832 gen_rtx_UNSPEC (mode, gen_rtvec (1, b),
22833 UNSPEC_RCP)));
22834 /* e0 = x0 * b */
22835 emit_insn (gen_rtx_SET (VOIDmode, e0,
22836 gen_rtx_MULT (mode, x0, b)));
22837 /* e1 = 2. - e0 */
22838 emit_insn (gen_rtx_SET (VOIDmode, e1,
22839 gen_rtx_MINUS (mode, two, e0)));
22840 /* x1 = x0 * e1 */
22841 emit_insn (gen_rtx_SET (VOIDmode, x1,
22842 gen_rtx_MULT (mode, x0, e1)));
22843 /* res = a * x1 */
22844 emit_insn (gen_rtx_SET (VOIDmode, res,
22845 gen_rtx_MULT (mode, a, x1)));
22846 }
22847
22848 /* Output code to perform a Newton-Rhapson approximation of a
22849 single precision floating point [reciprocal] square root. */
22850
22851 void ix86_emit_swsqrtsf (rtx res, rtx a, enum machine_mode mode,
22852 bool recip)
22853 {
22854 rtx x0, e0, e1, e2, e3, three, half, zero, mask;
22855
22856 x0 = gen_reg_rtx (mode);
22857 e0 = gen_reg_rtx (mode);
22858 e1 = gen_reg_rtx (mode);
22859 e2 = gen_reg_rtx (mode);
22860 e3 = gen_reg_rtx (mode);
22861
22862 three = CONST_DOUBLE_FROM_REAL_VALUE (dconst3, SFmode);
22863 half = CONST_DOUBLE_FROM_REAL_VALUE (dconsthalf, SFmode);
22864
22865 mask = gen_reg_rtx (mode);
22866
22867 if (VECTOR_MODE_P (mode))
22868 {
22869 three = ix86_build_const_vector (SFmode, true, three);
22870 half = ix86_build_const_vector (SFmode, true, half);
22871 }
22872
22873 three = force_reg (mode, three);
22874 half = force_reg (mode, half);
22875
22876 zero = force_reg (mode, CONST0_RTX(mode));
22877
22878 /* sqrt(a) = 0.5 * a * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a))
22879 1.0 / sqrt(a) = 0.5 * rsqrtss(a) * (3.0 - a * rsqrtss(a) * rsqrtss(a)) */
22880
22881 /* Compare a to zero. */
22882 emit_insn (gen_rtx_SET (VOIDmode, mask,
22883 gen_rtx_NE (mode, a, zero)));
22884
22885 /* x0 = 1./sqrt(a) estimate */
22886 emit_insn (gen_rtx_SET (VOIDmode, x0,
22887 gen_rtx_UNSPEC (mode, gen_rtvec (1, a),
22888 UNSPEC_RSQRT)));
22889 /* Filter out infinity. */
22890 if (VECTOR_MODE_P (mode))
22891 emit_insn (gen_rtx_SET (VOIDmode, gen_lowpart (V4SFmode, x0),
22892 gen_rtx_AND (mode,
22893 gen_lowpart (V4SFmode, x0),
22894 gen_lowpart (V4SFmode, mask))));
22895 else
22896 emit_insn (gen_rtx_SET (VOIDmode, x0,
22897 gen_rtx_AND (mode, x0, mask)));
22898
22899 /* e0 = x0 * a */
22900 emit_insn (gen_rtx_SET (VOIDmode, e0,
22901 gen_rtx_MULT (mode, x0, a)));
22902 /* e1 = e0 * x0 */
22903 emit_insn (gen_rtx_SET (VOIDmode, e1,
22904 gen_rtx_MULT (mode, e0, x0)));
22905 /* e2 = 3. - e1 */
22906 emit_insn (gen_rtx_SET (VOIDmode, e2,
22907 gen_rtx_MINUS (mode, three, e1)));
22908 if (recip)
22909 /* e3 = .5 * x0 */
22910 emit_insn (gen_rtx_SET (VOIDmode, e3,
22911 gen_rtx_MULT (mode, half, x0)));
22912 else
22913 /* e3 = .5 * e0 */
22914 emit_insn (gen_rtx_SET (VOIDmode, e3,
22915 gen_rtx_MULT (mode, half, e0)));
22916 /* ret = e2 * e3 */
22917 emit_insn (gen_rtx_SET (VOIDmode, res,
22918 gen_rtx_MULT (mode, e2, e3)));
22919 }
22920
22921 /* Solaris implementation of TARGET_ASM_NAMED_SECTION. */
22922
22923 static void ATTRIBUTE_UNUSED
22924 i386_solaris_elf_named_section (const char *name, unsigned int flags,
22925 tree decl)
22926 {
22927 /* With Binutils 2.15, the "@unwind" marker must be specified on
22928 every occurrence of the ".eh_frame" section, not just the first
22929 one. */
22930 if (TARGET_64BIT
22931 && strcmp (name, ".eh_frame") == 0)
22932 {
22933 fprintf (asm_out_file, "\t.section\t%s,\"%s\",@unwind\n", name,
22934 flags & SECTION_WRITE ? "aw" : "a");
22935 return;
22936 }
22937 default_elf_asm_named_section (name, flags, decl);
22938 }
22939
22940 /* Return the mangling of TYPE if it is an extended fundamental type. */
22941
22942 static const char *
22943 ix86_mangle_type (const_tree type)
22944 {
22945 type = TYPE_MAIN_VARIANT (type);
22946
22947 if (TREE_CODE (type) != VOID_TYPE && TREE_CODE (type) != BOOLEAN_TYPE
22948 && TREE_CODE (type) != INTEGER_TYPE && TREE_CODE (type) != REAL_TYPE)
22949 return NULL;
22950
22951 switch (TYPE_MODE (type))
22952 {
22953 case TFmode:
22954 /* __float128 is "g". */
22955 return "g";
22956 case XFmode:
22957 /* "long double" or __float80 is "e". */
22958 return "e";
22959 default:
22960 return NULL;
22961 }
22962 }
22963
22964 /* For 32-bit code we can save PIC register setup by using
22965 __stack_chk_fail_local hidden function instead of calling
22966 __stack_chk_fail directly. 64-bit code doesn't need to setup any PIC
22967 register, so it is better to call __stack_chk_fail directly. */
22968
22969 static tree
22970 ix86_stack_protect_fail (void)
22971 {
22972 return TARGET_64BIT
22973 ? default_external_stack_protect_fail ()
22974 : default_hidden_stack_protect_fail ();
22975 }
22976
22977 /* Select a format to encode pointers in exception handling data. CODE
22978 is 0 for data, 1 for code labels, 2 for function pointers. GLOBAL is
22979 true if the symbol may be affected by dynamic relocations.
22980
22981 ??? All x86 object file formats are capable of representing this.
22982 After all, the relocation needed is the same as for the call insn.
22983 Whether or not a particular assembler allows us to enter such, I
22984 guess we'll have to see. */
22985 int
22986 asm_preferred_eh_data_format (int code, int global)
22987 {
22988 if (flag_pic)
22989 {
22990 int type = DW_EH_PE_sdata8;
22991 if (!TARGET_64BIT
22992 || ix86_cmodel == CM_SMALL_PIC
22993 || (ix86_cmodel == CM_MEDIUM_PIC && (global || code)))
22994 type = DW_EH_PE_sdata4;
22995 return (global ? DW_EH_PE_indirect : 0) | DW_EH_PE_pcrel | type;
22996 }
22997 if (ix86_cmodel == CM_SMALL
22998 || (ix86_cmodel == CM_MEDIUM && code))
22999 return DW_EH_PE_udata4;
23000 return DW_EH_PE_absptr;
23001 }
23002 \f
23003 /* Expand copysign from SIGN to the positive value ABS_VALUE
23004 storing in RESULT. If MASK is non-null, it shall be a mask to mask out
23005 the sign-bit. */
23006 static void
23007 ix86_sse_copysign_to_positive (rtx result, rtx abs_value, rtx sign, rtx mask)
23008 {
23009 enum machine_mode mode = GET_MODE (sign);
23010 rtx sgn = gen_reg_rtx (mode);
23011 if (mask == NULL_RTX)
23012 {
23013 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), false);
23014 if (!VECTOR_MODE_P (mode))
23015 {
23016 /* We need to generate a scalar mode mask in this case. */
23017 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23018 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23019 mask = gen_reg_rtx (mode);
23020 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23021 }
23022 }
23023 else
23024 mask = gen_rtx_NOT (mode, mask);
23025 emit_insn (gen_rtx_SET (VOIDmode, sgn,
23026 gen_rtx_AND (mode, mask, sign)));
23027 emit_insn (gen_rtx_SET (VOIDmode, result,
23028 gen_rtx_IOR (mode, abs_value, sgn)));
23029 }
23030
23031 /* Expand fabs (OP0) and return a new rtx that holds the result. The
23032 mask for masking out the sign-bit is stored in *SMASK, if that is
23033 non-null. */
23034 static rtx
23035 ix86_expand_sse_fabs (rtx op0, rtx *smask)
23036 {
23037 enum machine_mode mode = GET_MODE (op0);
23038 rtx xa, mask;
23039
23040 xa = gen_reg_rtx (mode);
23041 mask = ix86_build_signbit_mask (mode, VECTOR_MODE_P (mode), true);
23042 if (!VECTOR_MODE_P (mode))
23043 {
23044 /* We need to generate a scalar mode mask in this case. */
23045 rtx tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec (1, const0_rtx));
23046 tmp = gen_rtx_VEC_SELECT (mode, mask, tmp);
23047 mask = gen_reg_rtx (mode);
23048 emit_insn (gen_rtx_SET (VOIDmode, mask, tmp));
23049 }
23050 emit_insn (gen_rtx_SET (VOIDmode, xa,
23051 gen_rtx_AND (mode, op0, mask)));
23052
23053 if (smask)
23054 *smask = mask;
23055
23056 return xa;
23057 }
23058
23059 /* Expands a comparison of OP0 with OP1 using comparison code CODE,
23060 swapping the operands if SWAP_OPERANDS is true. The expanded
23061 code is a forward jump to a newly created label in case the
23062 comparison is true. The generated label rtx is returned. */
23063 static rtx
23064 ix86_expand_sse_compare_and_jump (enum rtx_code code, rtx op0, rtx op1,
23065 bool swap_operands)
23066 {
23067 rtx label, tmp;
23068
23069 if (swap_operands)
23070 {
23071 tmp = op0;
23072 op0 = op1;
23073 op1 = tmp;
23074 }
23075
23076 label = gen_label_rtx ();
23077 tmp = gen_rtx_REG (CCFPUmode, FLAGS_REG);
23078 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23079 gen_rtx_COMPARE (CCFPUmode, op0, op1)));
23080 tmp = gen_rtx_fmt_ee (code, VOIDmode, tmp, const0_rtx);
23081 tmp = gen_rtx_IF_THEN_ELSE (VOIDmode, tmp,
23082 gen_rtx_LABEL_REF (VOIDmode, label), pc_rtx);
23083 tmp = emit_jump_insn (gen_rtx_SET (VOIDmode, pc_rtx, tmp));
23084 JUMP_LABEL (tmp) = label;
23085
23086 return label;
23087 }
23088
23089 /* Expand a mask generating SSE comparison instruction comparing OP0 with OP1
23090 using comparison code CODE. Operands are swapped for the comparison if
23091 SWAP_OPERANDS is true. Returns a rtx for the generated mask. */
23092 static rtx
23093 ix86_expand_sse_compare_mask (enum rtx_code code, rtx op0, rtx op1,
23094 bool swap_operands)
23095 {
23096 enum machine_mode mode = GET_MODE (op0);
23097 rtx mask = gen_reg_rtx (mode);
23098
23099 if (swap_operands)
23100 {
23101 rtx tmp = op0;
23102 op0 = op1;
23103 op1 = tmp;
23104 }
23105
23106 if (mode == DFmode)
23107 emit_insn (gen_sse2_maskcmpdf3 (mask, op0, op1,
23108 gen_rtx_fmt_ee (code, mode, op0, op1)));
23109 else
23110 emit_insn (gen_sse_maskcmpsf3 (mask, op0, op1,
23111 gen_rtx_fmt_ee (code, mode, op0, op1)));
23112
23113 return mask;
23114 }
23115
23116 /* Generate and return a rtx of mode MODE for 2**n where n is the number
23117 of bits of the mantissa of MODE, which must be one of DFmode or SFmode. */
23118 static rtx
23119 ix86_gen_TWO52 (enum machine_mode mode)
23120 {
23121 REAL_VALUE_TYPE TWO52r;
23122 rtx TWO52;
23123
23124 real_ldexp (&TWO52r, &dconst1, mode == DFmode ? 52 : 23);
23125 TWO52 = const_double_from_real_value (TWO52r, mode);
23126 TWO52 = force_reg (mode, TWO52);
23127
23128 return TWO52;
23129 }
23130
23131 /* Expand SSE sequence for computing lround from OP1 storing
23132 into OP0. */
23133 void
23134 ix86_expand_lround (rtx op0, rtx op1)
23135 {
23136 /* C code for the stuff we're doing below:
23137 tmp = op1 + copysign (nextafter (0.5, 0.0), op1)
23138 return (long)tmp;
23139 */
23140 enum machine_mode mode = GET_MODE (op1);
23141 const struct real_format *fmt;
23142 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23143 rtx adj;
23144
23145 /* load nextafter (0.5, 0.0) */
23146 fmt = REAL_MODE_FORMAT (mode);
23147 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23148 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23149
23150 /* adj = copysign (0.5, op1) */
23151 adj = force_reg (mode, const_double_from_real_value (pred_half, mode));
23152 ix86_sse_copysign_to_positive (adj, adj, force_reg (mode, op1), NULL_RTX);
23153
23154 /* adj = op1 + adj */
23155 adj = expand_simple_binop (mode, PLUS, adj, op1, NULL_RTX, 0, OPTAB_DIRECT);
23156
23157 /* op0 = (imode)adj */
23158 expand_fix (op0, adj, 0);
23159 }
23160
23161 /* Expand SSE2 sequence for computing lround from OPERAND1 storing
23162 into OPERAND0. */
23163 void
23164 ix86_expand_lfloorceil (rtx op0, rtx op1, bool do_floor)
23165 {
23166 /* C code for the stuff we're doing below (for do_floor):
23167 xi = (long)op1;
23168 xi -= (double)xi > op1 ? 1 : 0;
23169 return xi;
23170 */
23171 enum machine_mode fmode = GET_MODE (op1);
23172 enum machine_mode imode = GET_MODE (op0);
23173 rtx ireg, freg, label, tmp;
23174
23175 /* reg = (long)op1 */
23176 ireg = gen_reg_rtx (imode);
23177 expand_fix (ireg, op1, 0);
23178
23179 /* freg = (double)reg */
23180 freg = gen_reg_rtx (fmode);
23181 expand_float (freg, ireg, 0);
23182
23183 /* ireg = (freg > op1) ? ireg - 1 : ireg */
23184 label = ix86_expand_sse_compare_and_jump (UNLE,
23185 freg, op1, !do_floor);
23186 tmp = expand_simple_binop (imode, do_floor ? MINUS : PLUS,
23187 ireg, const1_rtx, NULL_RTX, 0, OPTAB_DIRECT);
23188 emit_move_insn (ireg, tmp);
23189
23190 emit_label (label);
23191 LABEL_NUSES (label) = 1;
23192
23193 emit_move_insn (op0, ireg);
23194 }
23195
23196 /* Expand rint (IEEE round to nearest) rounding OPERAND1 and storing the
23197 result in OPERAND0. */
23198 void
23199 ix86_expand_rint (rtx operand0, rtx operand1)
23200 {
23201 /* C code for the stuff we're doing below:
23202 xa = fabs (operand1);
23203 if (!isless (xa, 2**52))
23204 return operand1;
23205 xa = xa + 2**52 - 2**52;
23206 return copysign (xa, operand1);
23207 */
23208 enum machine_mode mode = GET_MODE (operand0);
23209 rtx res, xa, label, TWO52, mask;
23210
23211 res = gen_reg_rtx (mode);
23212 emit_move_insn (res, operand1);
23213
23214 /* xa = abs (operand1) */
23215 xa = ix86_expand_sse_fabs (res, &mask);
23216
23217 /* if (!isless (xa, TWO52)) goto label; */
23218 TWO52 = ix86_gen_TWO52 (mode);
23219 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23220
23221 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23222 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23223
23224 ix86_sse_copysign_to_positive (res, xa, res, mask);
23225
23226 emit_label (label);
23227 LABEL_NUSES (label) = 1;
23228
23229 emit_move_insn (operand0, res);
23230 }
23231
23232 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23233 into OPERAND0. */
23234 void
23235 ix86_expand_floorceildf_32 (rtx operand0, rtx operand1, bool do_floor)
23236 {
23237 /* C code for the stuff we expand below.
23238 double xa = fabs (x), x2;
23239 if (!isless (xa, TWO52))
23240 return x;
23241 xa = xa + TWO52 - TWO52;
23242 x2 = copysign (xa, x);
23243 Compensate. Floor:
23244 if (x2 > x)
23245 x2 -= 1;
23246 Compensate. Ceil:
23247 if (x2 < x)
23248 x2 -= -1;
23249 return x2;
23250 */
23251 enum machine_mode mode = GET_MODE (operand0);
23252 rtx xa, TWO52, tmp, label, one, res, mask;
23253
23254 TWO52 = ix86_gen_TWO52 (mode);
23255
23256 /* Temporary for holding the result, initialized to the input
23257 operand to ease control flow. */
23258 res = gen_reg_rtx (mode);
23259 emit_move_insn (res, operand1);
23260
23261 /* xa = abs (operand1) */
23262 xa = ix86_expand_sse_fabs (res, &mask);
23263
23264 /* if (!isless (xa, TWO52)) goto label; */
23265 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23266
23267 /* xa = xa + TWO52 - TWO52; */
23268 xa = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23269 xa = expand_simple_binop (mode, MINUS, xa, TWO52, xa, 0, OPTAB_DIRECT);
23270
23271 /* xa = copysign (xa, operand1) */
23272 ix86_sse_copysign_to_positive (xa, xa, res, mask);
23273
23274 /* generate 1.0 or -1.0 */
23275 one = force_reg (mode,
23276 const_double_from_real_value (do_floor
23277 ? dconst1 : dconstm1, mode));
23278
23279 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23280 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23281 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23282 gen_rtx_AND (mode, one, tmp)));
23283 /* We always need to subtract here to preserve signed zero. */
23284 tmp = expand_simple_binop (mode, MINUS,
23285 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23286 emit_move_insn (res, tmp);
23287
23288 emit_label (label);
23289 LABEL_NUSES (label) = 1;
23290
23291 emit_move_insn (operand0, res);
23292 }
23293
23294 /* Expand SSE2 sequence for computing floor or ceil from OPERAND1 storing
23295 into OPERAND0. */
23296 void
23297 ix86_expand_floorceil (rtx operand0, rtx operand1, bool do_floor)
23298 {
23299 /* C code for the stuff we expand below.
23300 double xa = fabs (x), x2;
23301 if (!isless (xa, TWO52))
23302 return x;
23303 x2 = (double)(long)x;
23304 Compensate. Floor:
23305 if (x2 > x)
23306 x2 -= 1;
23307 Compensate. Ceil:
23308 if (x2 < x)
23309 x2 += 1;
23310 if (HONOR_SIGNED_ZEROS (mode))
23311 return copysign (x2, x);
23312 return x2;
23313 */
23314 enum machine_mode mode = GET_MODE (operand0);
23315 rtx xa, xi, TWO52, tmp, label, one, res, mask;
23316
23317 TWO52 = ix86_gen_TWO52 (mode);
23318
23319 /* Temporary for holding the result, initialized to the input
23320 operand to ease control flow. */
23321 res = gen_reg_rtx (mode);
23322 emit_move_insn (res, operand1);
23323
23324 /* xa = abs (operand1) */
23325 xa = ix86_expand_sse_fabs (res, &mask);
23326
23327 /* if (!isless (xa, TWO52)) goto label; */
23328 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23329
23330 /* xa = (double)(long)x */
23331 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23332 expand_fix (xi, res, 0);
23333 expand_float (xa, xi, 0);
23334
23335 /* generate 1.0 */
23336 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23337
23338 /* Compensate: xa = xa - (xa > operand1 ? 1 : 0) */
23339 tmp = ix86_expand_sse_compare_mask (UNGT, xa, res, !do_floor);
23340 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23341 gen_rtx_AND (mode, one, tmp)));
23342 tmp = expand_simple_binop (mode, do_floor ? MINUS : PLUS,
23343 xa, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23344 emit_move_insn (res, tmp);
23345
23346 if (HONOR_SIGNED_ZEROS (mode))
23347 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23348
23349 emit_label (label);
23350 LABEL_NUSES (label) = 1;
23351
23352 emit_move_insn (operand0, res);
23353 }
23354
23355 /* Expand SSE sequence for computing round from OPERAND1 storing
23356 into OPERAND0. Sequence that works without relying on DImode truncation
23357 via cvttsd2siq that is only available on 64bit targets. */
23358 void
23359 ix86_expand_rounddf_32 (rtx operand0, rtx operand1)
23360 {
23361 /* C code for the stuff we expand below.
23362 double xa = fabs (x), xa2, x2;
23363 if (!isless (xa, TWO52))
23364 return x;
23365 Using the absolute value and copying back sign makes
23366 -0.0 -> -0.0 correct.
23367 xa2 = xa + TWO52 - TWO52;
23368 Compensate.
23369 dxa = xa2 - xa;
23370 if (dxa <= -0.5)
23371 xa2 += 1;
23372 else if (dxa > 0.5)
23373 xa2 -= 1;
23374 x2 = copysign (xa2, x);
23375 return x2;
23376 */
23377 enum machine_mode mode = GET_MODE (operand0);
23378 rtx xa, xa2, dxa, TWO52, tmp, label, half, mhalf, one, res, mask;
23379
23380 TWO52 = ix86_gen_TWO52 (mode);
23381
23382 /* Temporary for holding the result, initialized to the input
23383 operand to ease control flow. */
23384 res = gen_reg_rtx (mode);
23385 emit_move_insn (res, operand1);
23386
23387 /* xa = abs (operand1) */
23388 xa = ix86_expand_sse_fabs (res, &mask);
23389
23390 /* if (!isless (xa, TWO52)) goto label; */
23391 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23392
23393 /* xa2 = xa + TWO52 - TWO52; */
23394 xa2 = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23395 xa2 = expand_simple_binop (mode, MINUS, xa2, TWO52, xa2, 0, OPTAB_DIRECT);
23396
23397 /* dxa = xa2 - xa; */
23398 dxa = expand_simple_binop (mode, MINUS, xa2, xa, NULL_RTX, 0, OPTAB_DIRECT);
23399
23400 /* generate 0.5, 1.0 and -0.5 */
23401 half = force_reg (mode, const_double_from_real_value (dconsthalf, mode));
23402 one = expand_simple_binop (mode, PLUS, half, half, NULL_RTX, 0, OPTAB_DIRECT);
23403 mhalf = expand_simple_binop (mode, MINUS, half, one, NULL_RTX,
23404 0, OPTAB_DIRECT);
23405
23406 /* Compensate. */
23407 tmp = gen_reg_rtx (mode);
23408 /* xa2 = xa2 - (dxa > 0.5 ? 1 : 0) */
23409 tmp = ix86_expand_sse_compare_mask (UNGT, dxa, half, false);
23410 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23411 gen_rtx_AND (mode, one, tmp)));
23412 xa2 = expand_simple_binop (mode, MINUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23413 /* xa2 = xa2 + (dxa <= -0.5 ? 1 : 0) */
23414 tmp = ix86_expand_sse_compare_mask (UNGE, mhalf, dxa, false);
23415 emit_insn (gen_rtx_SET (VOIDmode, tmp,
23416 gen_rtx_AND (mode, one, tmp)));
23417 xa2 = expand_simple_binop (mode, PLUS, xa2, tmp, NULL_RTX, 0, OPTAB_DIRECT);
23418
23419 /* res = copysign (xa2, operand1) */
23420 ix86_sse_copysign_to_positive (res, xa2, force_reg (mode, operand1), mask);
23421
23422 emit_label (label);
23423 LABEL_NUSES (label) = 1;
23424
23425 emit_move_insn (operand0, res);
23426 }
23427
23428 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23429 into OPERAND0. */
23430 void
23431 ix86_expand_trunc (rtx operand0, rtx operand1)
23432 {
23433 /* C code for SSE variant we expand below.
23434 double xa = fabs (x), x2;
23435 if (!isless (xa, TWO52))
23436 return x;
23437 x2 = (double)(long)x;
23438 if (HONOR_SIGNED_ZEROS (mode))
23439 return copysign (x2, x);
23440 return x2;
23441 */
23442 enum machine_mode mode = GET_MODE (operand0);
23443 rtx xa, xi, TWO52, label, res, mask;
23444
23445 TWO52 = ix86_gen_TWO52 (mode);
23446
23447 /* Temporary for holding the result, initialized to the input
23448 operand to ease control flow. */
23449 res = gen_reg_rtx (mode);
23450 emit_move_insn (res, operand1);
23451
23452 /* xa = abs (operand1) */
23453 xa = ix86_expand_sse_fabs (res, &mask);
23454
23455 /* if (!isless (xa, TWO52)) goto label; */
23456 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23457
23458 /* x = (double)(long)x */
23459 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23460 expand_fix (xi, res, 0);
23461 expand_float (res, xi, 0);
23462
23463 if (HONOR_SIGNED_ZEROS (mode))
23464 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), mask);
23465
23466 emit_label (label);
23467 LABEL_NUSES (label) = 1;
23468
23469 emit_move_insn (operand0, res);
23470 }
23471
23472 /* Expand SSE sequence for computing trunc from OPERAND1 storing
23473 into OPERAND0. */
23474 void
23475 ix86_expand_truncdf_32 (rtx operand0, rtx operand1)
23476 {
23477 enum machine_mode mode = GET_MODE (operand0);
23478 rtx xa, mask, TWO52, label, one, res, smask, tmp;
23479
23480 /* C code for SSE variant we expand below.
23481 double xa = fabs (x), x2;
23482 if (!isless (xa, TWO52))
23483 return x;
23484 xa2 = xa + TWO52 - TWO52;
23485 Compensate:
23486 if (xa2 > xa)
23487 xa2 -= 1.0;
23488 x2 = copysign (xa2, x);
23489 return x2;
23490 */
23491
23492 TWO52 = ix86_gen_TWO52 (mode);
23493
23494 /* Temporary for holding the result, initialized to the input
23495 operand to ease control flow. */
23496 res = gen_reg_rtx (mode);
23497 emit_move_insn (res, operand1);
23498
23499 /* xa = abs (operand1) */
23500 xa = ix86_expand_sse_fabs (res, &smask);
23501
23502 /* if (!isless (xa, TWO52)) goto label; */
23503 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23504
23505 /* res = xa + TWO52 - TWO52; */
23506 tmp = expand_simple_binop (mode, PLUS, xa, TWO52, NULL_RTX, 0, OPTAB_DIRECT);
23507 tmp = expand_simple_binop (mode, MINUS, tmp, TWO52, tmp, 0, OPTAB_DIRECT);
23508 emit_move_insn (res, tmp);
23509
23510 /* generate 1.0 */
23511 one = force_reg (mode, const_double_from_real_value (dconst1, mode));
23512
23513 /* Compensate: res = xa2 - (res > xa ? 1 : 0) */
23514 mask = ix86_expand_sse_compare_mask (UNGT, res, xa, false);
23515 emit_insn (gen_rtx_SET (VOIDmode, mask,
23516 gen_rtx_AND (mode, mask, one)));
23517 tmp = expand_simple_binop (mode, MINUS,
23518 res, mask, NULL_RTX, 0, OPTAB_DIRECT);
23519 emit_move_insn (res, tmp);
23520
23521 /* res = copysign (res, operand1) */
23522 ix86_sse_copysign_to_positive (res, res, force_reg (mode, operand1), smask);
23523
23524 emit_label (label);
23525 LABEL_NUSES (label) = 1;
23526
23527 emit_move_insn (operand0, res);
23528 }
23529
23530 /* Expand SSE sequence for computing round from OPERAND1 storing
23531 into OPERAND0. */
23532 void
23533 ix86_expand_round (rtx operand0, rtx operand1)
23534 {
23535 /* C code for the stuff we're doing below:
23536 double xa = fabs (x);
23537 if (!isless (xa, TWO52))
23538 return x;
23539 xa = (double)(long)(xa + nextafter (0.5, 0.0));
23540 return copysign (xa, x);
23541 */
23542 enum machine_mode mode = GET_MODE (operand0);
23543 rtx res, TWO52, xa, label, xi, half, mask;
23544 const struct real_format *fmt;
23545 REAL_VALUE_TYPE pred_half, half_minus_pred_half;
23546
23547 /* Temporary for holding the result, initialized to the input
23548 operand to ease control flow. */
23549 res = gen_reg_rtx (mode);
23550 emit_move_insn (res, operand1);
23551
23552 TWO52 = ix86_gen_TWO52 (mode);
23553 xa = ix86_expand_sse_fabs (res, &mask);
23554 label = ix86_expand_sse_compare_and_jump (UNLE, TWO52, xa, false);
23555
23556 /* load nextafter (0.5, 0.0) */
23557 fmt = REAL_MODE_FORMAT (mode);
23558 real_2expN (&half_minus_pred_half, -(fmt->p) - 1, mode);
23559 REAL_ARITHMETIC (pred_half, MINUS_EXPR, dconsthalf, half_minus_pred_half);
23560
23561 /* xa = xa + 0.5 */
23562 half = force_reg (mode, const_double_from_real_value (pred_half, mode));
23563 xa = expand_simple_binop (mode, PLUS, xa, half, NULL_RTX, 0, OPTAB_DIRECT);
23564
23565 /* xa = (double)(int64_t)xa */
23566 xi = gen_reg_rtx (mode == DFmode ? DImode : SImode);
23567 expand_fix (xi, xa, 0);
23568 expand_float (xa, xi, 0);
23569
23570 /* res = copysign (xa, operand1) */
23571 ix86_sse_copysign_to_positive (res, xa, force_reg (mode, operand1), mask);
23572
23573 emit_label (label);
23574 LABEL_NUSES (label) = 1;
23575
23576 emit_move_insn (operand0, res);
23577 }
23578
23579 \f
23580 /* Table of valid machine attributes. */
23581 static const struct attribute_spec ix86_attribute_table[] =
23582 {
23583 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler } */
23584 /* Stdcall attribute says callee is responsible for popping arguments
23585 if they are not variable. */
23586 { "stdcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23587 /* Fastcall attribute says callee is responsible for popping arguments
23588 if they are not variable. */
23589 { "fastcall", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23590 /* Cdecl attribute says the callee is a normal C declaration */
23591 { "cdecl", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23592 /* Regparm attribute specifies how many integer arguments are to be
23593 passed in registers. */
23594 { "regparm", 1, 1, false, true, true, ix86_handle_cconv_attribute },
23595 /* Sseregparm attribute says we are using x86_64 calling conventions
23596 for FP arguments. */
23597 { "sseregparm", 0, 0, false, true, true, ix86_handle_cconv_attribute },
23598 /* force_align_arg_pointer says this function realigns the stack at entry. */
23599 { (const char *)&ix86_force_align_arg_pointer_string, 0, 0,
23600 false, true, true, ix86_handle_cconv_attribute },
23601 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23602 { "dllimport", 0, 0, false, false, false, handle_dll_attribute },
23603 { "dllexport", 0, 0, false, false, false, handle_dll_attribute },
23604 { "shared", 0, 0, true, false, false, ix86_handle_shared_attribute },
23605 #endif
23606 { "ms_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23607 { "gcc_struct", 0, 0, false, false, false, ix86_handle_struct_attribute },
23608 #ifdef SUBTARGET_ATTRIBUTE_TABLE
23609 SUBTARGET_ATTRIBUTE_TABLE,
23610 #endif
23611 { NULL, 0, 0, false, false, false, NULL }
23612 };
23613
23614 /* Initialize the GCC target structure. */
23615 #undef TARGET_ATTRIBUTE_TABLE
23616 #define TARGET_ATTRIBUTE_TABLE ix86_attribute_table
23617 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23618 # undef TARGET_MERGE_DECL_ATTRIBUTES
23619 # define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
23620 #endif
23621
23622 #undef TARGET_COMP_TYPE_ATTRIBUTES
23623 #define TARGET_COMP_TYPE_ATTRIBUTES ix86_comp_type_attributes
23624
23625 #undef TARGET_INIT_BUILTINS
23626 #define TARGET_INIT_BUILTINS ix86_init_builtins
23627 #undef TARGET_EXPAND_BUILTIN
23628 #define TARGET_EXPAND_BUILTIN ix86_expand_builtin
23629
23630 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
23631 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
23632 ix86_builtin_vectorized_function
23633
23634 #undef TARGET_VECTORIZE_BUILTIN_CONVERSION
23635 #define TARGET_VECTORIZE_BUILTIN_CONVERSION ix86_vectorize_builtin_conversion
23636
23637 #undef TARGET_BUILTIN_RECIPROCAL
23638 #define TARGET_BUILTIN_RECIPROCAL ix86_builtin_reciprocal
23639
23640 #undef TARGET_ASM_FUNCTION_EPILOGUE
23641 #define TARGET_ASM_FUNCTION_EPILOGUE ix86_output_function_epilogue
23642
23643 #undef TARGET_ENCODE_SECTION_INFO
23644 #ifndef SUBTARGET_ENCODE_SECTION_INFO
23645 #define TARGET_ENCODE_SECTION_INFO ix86_encode_section_info
23646 #else
23647 #define TARGET_ENCODE_SECTION_INFO SUBTARGET_ENCODE_SECTION_INFO
23648 #endif
23649
23650 #undef TARGET_ASM_OPEN_PAREN
23651 #define TARGET_ASM_OPEN_PAREN ""
23652 #undef TARGET_ASM_CLOSE_PAREN
23653 #define TARGET_ASM_CLOSE_PAREN ""
23654
23655 #undef TARGET_ASM_ALIGNED_HI_OP
23656 #define TARGET_ASM_ALIGNED_HI_OP ASM_SHORT
23657 #undef TARGET_ASM_ALIGNED_SI_OP
23658 #define TARGET_ASM_ALIGNED_SI_OP ASM_LONG
23659 #ifdef ASM_QUAD
23660 #undef TARGET_ASM_ALIGNED_DI_OP
23661 #define TARGET_ASM_ALIGNED_DI_OP ASM_QUAD
23662 #endif
23663
23664 #undef TARGET_ASM_UNALIGNED_HI_OP
23665 #define TARGET_ASM_UNALIGNED_HI_OP TARGET_ASM_ALIGNED_HI_OP
23666 #undef TARGET_ASM_UNALIGNED_SI_OP
23667 #define TARGET_ASM_UNALIGNED_SI_OP TARGET_ASM_ALIGNED_SI_OP
23668 #undef TARGET_ASM_UNALIGNED_DI_OP
23669 #define TARGET_ASM_UNALIGNED_DI_OP TARGET_ASM_ALIGNED_DI_OP
23670
23671 #undef TARGET_SCHED_ADJUST_COST
23672 #define TARGET_SCHED_ADJUST_COST ix86_adjust_cost
23673 #undef TARGET_SCHED_ISSUE_RATE
23674 #define TARGET_SCHED_ISSUE_RATE ix86_issue_rate
23675 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
23676 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
23677 ia32_multipass_dfa_lookahead
23678
23679 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
23680 #define TARGET_FUNCTION_OK_FOR_SIBCALL ix86_function_ok_for_sibcall
23681
23682 #ifdef HAVE_AS_TLS
23683 #undef TARGET_HAVE_TLS
23684 #define TARGET_HAVE_TLS true
23685 #endif
23686 #undef TARGET_CANNOT_FORCE_CONST_MEM
23687 #define TARGET_CANNOT_FORCE_CONST_MEM ix86_cannot_force_const_mem
23688 #undef TARGET_USE_BLOCKS_FOR_CONSTANT_P
23689 #define TARGET_USE_BLOCKS_FOR_CONSTANT_P hook_bool_mode_const_rtx_true
23690
23691 #undef TARGET_DELEGITIMIZE_ADDRESS
23692 #define TARGET_DELEGITIMIZE_ADDRESS ix86_delegitimize_address
23693
23694 #undef TARGET_MS_BITFIELD_LAYOUT_P
23695 #define TARGET_MS_BITFIELD_LAYOUT_P ix86_ms_bitfield_layout_p
23696
23697 #if TARGET_MACHO
23698 #undef TARGET_BINDS_LOCAL_P
23699 #define TARGET_BINDS_LOCAL_P darwin_binds_local_p
23700 #endif
23701 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
23702 #undef TARGET_BINDS_LOCAL_P
23703 #define TARGET_BINDS_LOCAL_P i386_pe_binds_local_p
23704 #endif
23705
23706 #undef TARGET_ASM_OUTPUT_MI_THUNK
23707 #define TARGET_ASM_OUTPUT_MI_THUNK x86_output_mi_thunk
23708 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
23709 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK x86_can_output_mi_thunk
23710
23711 #undef TARGET_ASM_FILE_START
23712 #define TARGET_ASM_FILE_START x86_file_start
23713
23714 #undef TARGET_DEFAULT_TARGET_FLAGS
23715 #define TARGET_DEFAULT_TARGET_FLAGS \
23716 (TARGET_DEFAULT \
23717 | TARGET_SUBTARGET_DEFAULT \
23718 | TARGET_TLS_DIRECT_SEG_REFS_DEFAULT)
23719
23720 #undef TARGET_HANDLE_OPTION
23721 #define TARGET_HANDLE_OPTION ix86_handle_option
23722
23723 #undef TARGET_RTX_COSTS
23724 #define TARGET_RTX_COSTS ix86_rtx_costs
23725 #undef TARGET_ADDRESS_COST
23726 #define TARGET_ADDRESS_COST ix86_address_cost
23727
23728 #undef TARGET_FIXED_CONDITION_CODE_REGS
23729 #define TARGET_FIXED_CONDITION_CODE_REGS ix86_fixed_condition_code_regs
23730 #undef TARGET_CC_MODES_COMPATIBLE
23731 #define TARGET_CC_MODES_COMPATIBLE ix86_cc_modes_compatible
23732
23733 #undef TARGET_MACHINE_DEPENDENT_REORG
23734 #define TARGET_MACHINE_DEPENDENT_REORG ix86_reorg
23735
23736 #undef TARGET_BUILD_BUILTIN_VA_LIST
23737 #define TARGET_BUILD_BUILTIN_VA_LIST ix86_build_builtin_va_list
23738
23739 #undef TARGET_MD_ASM_CLOBBERS
23740 #define TARGET_MD_ASM_CLOBBERS ix86_md_asm_clobbers
23741
23742 #undef TARGET_PROMOTE_PROTOTYPES
23743 #define TARGET_PROMOTE_PROTOTYPES hook_bool_const_tree_true
23744 #undef TARGET_STRUCT_VALUE_RTX
23745 #define TARGET_STRUCT_VALUE_RTX ix86_struct_value_rtx
23746 #undef TARGET_SETUP_INCOMING_VARARGS
23747 #define TARGET_SETUP_INCOMING_VARARGS ix86_setup_incoming_varargs
23748 #undef TARGET_MUST_PASS_IN_STACK
23749 #define TARGET_MUST_PASS_IN_STACK ix86_must_pass_in_stack
23750 #undef TARGET_PASS_BY_REFERENCE
23751 #define TARGET_PASS_BY_REFERENCE ix86_pass_by_reference
23752 #undef TARGET_INTERNAL_ARG_POINTER
23753 #define TARGET_INTERNAL_ARG_POINTER ix86_internal_arg_pointer
23754 #undef TARGET_DWARF_HANDLE_FRAME_UNSPEC
23755 #define TARGET_DWARF_HANDLE_FRAME_UNSPEC ix86_dwarf_handle_frame_unspec
23756 #undef TARGET_STRICT_ARGUMENT_NAMING
23757 #define TARGET_STRICT_ARGUMENT_NAMING hook_bool_CUMULATIVE_ARGS_true
23758
23759 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
23760 #define TARGET_GIMPLIFY_VA_ARG_EXPR ix86_gimplify_va_arg
23761
23762 #undef TARGET_SCALAR_MODE_SUPPORTED_P
23763 #define TARGET_SCALAR_MODE_SUPPORTED_P ix86_scalar_mode_supported_p
23764
23765 #undef TARGET_VECTOR_MODE_SUPPORTED_P
23766 #define TARGET_VECTOR_MODE_SUPPORTED_P ix86_vector_mode_supported_p
23767
23768 #undef TARGET_C_MODE_FOR_SUFFIX
23769 #define TARGET_C_MODE_FOR_SUFFIX ix86_c_mode_for_suffix
23770
23771 #ifdef HAVE_AS_TLS
23772 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
23773 #define TARGET_ASM_OUTPUT_DWARF_DTPREL i386_output_dwarf_dtprel
23774 #endif
23775
23776 #ifdef SUBTARGET_INSERT_ATTRIBUTES
23777 #undef TARGET_INSERT_ATTRIBUTES
23778 #define TARGET_INSERT_ATTRIBUTES SUBTARGET_INSERT_ATTRIBUTES
23779 #endif
23780
23781 #undef TARGET_MANGLE_TYPE
23782 #define TARGET_MANGLE_TYPE ix86_mangle_type
23783
23784 #undef TARGET_STACK_PROTECT_FAIL
23785 #define TARGET_STACK_PROTECT_FAIL ix86_stack_protect_fail
23786
23787 #undef TARGET_FUNCTION_VALUE
23788 #define TARGET_FUNCTION_VALUE ix86_function_value
23789
23790 struct gcc_target targetm = TARGET_INITIALIZER;
23791 \f
23792 #include "gt-i386.h"